Example #1
0
 def test_localopt(self):
     self.mol[1] = "H", [0, 0, 1.05]
     adaptor = BabelMolAdaptor(self.mol)
     adaptor.localopt()
     optmol = adaptor.pymatgen_mol
     for site in optmol[1:]:
         self.assertAlmostEqual(site.distance(optmol[0]), 1.09216, 2)
Example #2
0
 def test_rotor_search_rrs(self):
     mol = copy.deepcopy(self.mol)
     mol[1] = "H", [0, 0, 1.05]
     adaptor = BabelMolAdaptor(mol)
     adaptor.rotor_conformer(250, 50, algo="RandomRotorSearch")
     optmol = adaptor.pymatgen_mol
     for site in optmol[1:]:
         self.assertAlmostEqual(site.distance(optmol[0]), 1.09216, 1)
Example #3
0
 def test_confab_conformers(self):
     mol = pb.readstring("smi", "CCCC").OBMol
     adaptor = BabelMolAdaptor(mol)
     adaptor.make3d()
     conformers = adaptor.confab_conformers()
     self.assertEquals(adaptor.openbabel_mol.NumRotors(), 1)
     self.assertGreaterEqual(len(conformers), 1)
     if len(conformers) > 1:
         self.assertNotAlmostEqual(
             MoleculeMatcher().get_rmsd(conformers[0], conformers[1]), 0)
Example #4
0
 def test_from_molecule_graph(self):
     graph = MoleculeGraph.with_empty_graph(self.mol)
     adaptor = BabelMolAdaptor.from_molecule_graph(graph)
     obmol = adaptor.openbabel_mol
     self.assertEqual(obmol.NumAtoms(), 5)
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H4 C1")
Example #5
0
    def restore_site_properties(self, site_property="ff_map", filename=None):
        """
        Restore the site properties for the final packed molecule.

        Args:
            site_property (str):
            filename (str): path to the final packed molecule.

        Returns:
            Molecule
        """

        # only for pdb
        if not self.control_params["filetype"] == "pdb":
            raise

        filename = filename or self.control_params["output"]
        bma = BabelMolAdaptor.from_file(filename, "pdb")
        pbm = pb.Molecule(bma._obmol)

        assert len(pbm.residues) == sum([x["number"] for x in self.param_list])

        packed_mol = self.convert_obatoms_to_molecule(
            pbm.residues[0].atoms, residue_name=pbm.residues[0].name,
            site_property=site_property)

        for resid in pbm.residues[1:]:
            mol = self.convert_obatoms_to_molecule(
                resid.atoms, residue_name=resid.name, site_property=site_property)
            for site in mol:
                packed_mol.append(site.species_and_occu, site.coords,
                                  properties=site.properties)

        return packed_mol
Example #6
0
def read_mol(filename):
    """
    Reads a molecule based on file extension. For example, anything ending in
    a "xyz" is assumed to be a XYZ file. Supported formats include xyz,
    gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and
    pymatgen's JSON serialized molecules. Using openbabel,
    many more extensions are supported but requires openbabel to be installed.

    Args:
        filename (str): A filename to read from.

    Returns:
        A Molecule object.
    """
    fname = os.path.basename(filename)
    if fnmatch(fname.lower(), "*.xyz*"):
        return XYZ.from_file(filename).molecule
    elif any([fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"]]):
        return GaussianInput.from_file(filename).molecule
    elif any([fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"]]):
        return GaussianOutput(filename).final_structure
    elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"):
        with zopen(filename) as f:
            s = json.load(f, cls=MontyDecoder)
            if type(s) != Molecule:
                raise IOError("File does not contain a valid serialized " "molecule")
            return s
    else:
        m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower())
        if m:
            return BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol

    raise ValueError("Unrecognized file extension!")
Example #7
0
    def run(self, copy_to_current_on_exit=False, site_property=None):
        """
        Write the input file to the scratch directory, run packmol and return
        the packed molecule.

        Args:
            copy_to_current_on_exit (bool): Whether or not to copy the packmol
                input/output files from the scratch directory to the current
                directory.
            site_property (str): if set then the specified site property
                for the the final packed molecule will be restored.

        Returns:
                Molecule object
        """
        scratch = tempfile.gettempdir()
        with ScratchDir(scratch, copy_to_current_on_exit=copy_to_current_on_exit) as scratch_dir:
            self._write_input(input_dir=scratch_dir)
            packmol_input = open(os.path.join(scratch_dir, self.input_file), 'r')
            p = Popen(self.packmol_bin, stdin=packmol_input, stdout=PIPE, stderr=PIPE)
            (stdout, stderr) = p.communicate()
            output_file = os.path.join(scratch_dir, self.control_params["output"])
            if os.path.isfile(output_file):
                packed_mol = BabelMolAdaptor.from_file(output_file,
                                                       self.control_params["filetype"])
                packed_mol = packed_mol.pymatgen_mol
                print("packed molecule written to {}".format(
                    self.control_params["output"]))
                if site_property:
                    packed_mol = self.restore_site_properties(site_property=site_property, filename=output_file)
                return packed_mol
            else:
                print("Packmol execution failed")
                print(stdout, stderr)
                return None
Example #8
0
    def run(self, copy_to_current_on_exit=False):
        """
        Write the input file to the scratch directory, run packmol and return
        the packed molecule.

        Args:
            copy_to_current_on_exit (bool): Whether or not to copy the packmol
                input/output files from the scratch directory to the current
                directory.

        Returns:
                Molecule object
        """
        scratch = tempfile.gettempdir()
        with ScratchDir(scratch, copy_to_current_on_exit=copy_to_current_on_exit) as scratch_dir:
            self._write_input(input_dir=scratch_dir)
            packmol_bin = ['packmol']
            packmol_input = open(os.path.join(scratch_dir, self.input_file), 'r')
            p = Popen(packmol_bin, stdin=packmol_input, stdout=PIPE, stderr=PIPE)
            p.wait()
            (stdout, stderr) = p.communicate()
            output_file = os.path.join(scratch_dir, self.control_params["output"])
            if os.path.isfile(output_file):
                packed_mol = BabelMolAdaptor.from_file(output_file)
                print("packed molecule written to {}".format(
                    self.control_params["output"]))
                return packed_mol.pymatgen_mol
            else:
                print("Packmol execution failed")
                print(stdout, stderr)
                return None
Example #9
0
def open_ring(mol_graph, bond, opt_steps):
    """
    Function to actually open a ring using OpenBabel's local opt. Given a molecule
    graph and a bond, convert the molecule graph into an OpenBabel molecule, remove
    the given bond, perform the local opt with the number of steps determined by
    self.steps, and then convert the resulting structure back into a molecule graph
    to be returned.
    """
    obmol = BabelMolAdaptor.from_molecule_graph(mol_graph)
    obmol.remove_bond(bond[0][0]+1, bond[0][1]+1)
    obmol.localopt(steps=opt_steps)
    return MoleculeGraph.with_local_env_strategy(obmol.pymatgen_mol, OpenBabelNN(), reorder=False, extend_structure=False)
Example #10
0
    def write_pdb(self, mol, filename, name=None, num=None):
        """
        dump the molecule into pdb file with custom residue name and number.
        """

        # ugly hack to get around the openbabel issues with inconsistent
        # residue labelling.
        scratch = tempfile.gettempdir()
        with ScratchDir(scratch, copy_to_current_on_exit=False) as _:
            mol.to(fmt="pdb", filename="tmp.pdb")
            bma = BabelMolAdaptor.from_file("tmp.pdb", "pdb")

        num = num or 1
        name = name or "ml{}".format(num)

        # bma = BabelMolAdaptor(mol)
        pbm = pb.Molecule(bma._obmol)
        for i, x in enumerate(pbm.residues):
            x.OBResidue.SetName(name)
            x.OBResidue.SetNum(num)

        pbm.write(format="pdb", filename=filename, overwrite=True)
Example #11
0
 def add_hydrogen(self):
     mol_0d = pb.readstring("smi", "CCCC").OBMol
     self.assertEqual(len(pb.Molecule(mol_0d).atoms), 2)
     adaptor = BabelMolAdaptor(mol_0d)
     adaptor.add_hydrogen()
     self.assertEqual(len(adaptor.pymatgen_mol.sites), 14)
Example #12
0
    def fragment_and_process(self, bonds):
        """
        Fragment and process bonds.

        :param bonds: Bonds to process.
        :return:
        """
        # Try to split the principle:
        try:
            frags = self.mol_graph.split_molecule_subgraphs(bonds,
                                                            allow_reverse=True)
            frag_success = True
        except MolGraphSplitError:
            # If split is unsuccessful, then we have encountered a ring bond
            if len(bonds) == 1:
                self.ring_bonds += bonds
                # So we open the ring and make sure we haven't already encountered an identically opened fragment:
                RO_frag = open_ring(self.mol_graph, bonds, 1000)
                frag_done = False
                for done_RO_frag in self.done_RO_frags:
                    if RO_frag.isomorphic_to(done_RO_frag):
                        frag_done = True
                if not frag_done:
                    # If this is a new fragment, save the record and then search for relevant fragment entries:
                    self.done_RO_frags.append(RO_frag)
                    opened_entries = self.search_fragment_entries(RO_frag)
                    good_entries = []
                    # We will start by looking at entries with no structure change
                    for frag in opened_entries[0]:  # 0 -> no structural change
                        # Since a ring opening still yields a single molecule, it should have the same charge as the
                        # principle:
                        if frag["initial_molecule"][
                                "charge"] == self.molecule_entry[
                                    "final_molecule"]["charge"]:
                            good_entries.append(frag)
                    # If we didn't find any good entries, let's also look at those that exhibit structural changes:
                    if len(good_entries) == 0:
                        for frag in opened_entries[
                                1]:  # 1 -> YES structural change
                            if frag["initial_molecule"][
                                    "charge"] == self.molecule_entry[
                                        "final_molecule"]["charge"]:
                                good_entries.append(frag)
                    # If we still have no good entries, something must have gone wrong with the calculations:
                    if len(good_entries) == 0:
                        bb = BabelMolAdaptor.from_molecule_graph(RO_frag)
                        pbmol = bb.pybel_mol
                        smiles = pbmol.write(str("smi")).split()[0]
                        specie = nx.get_node_attributes(
                            self.mol_graph.graph, "specie")
                        print(
                            "Missing ring opening fragment resulting from the breakage of "
                            + specie[bonds[0][0]] + " " + specie[bonds[0][1]] +
                            " bond " + str(bonds[0][0]) + " " +
                            str(bonds[0][1]) +
                            " which would yield a molecule with this SMILES string: "
                            + smiles)
                    elif len(good_entries) == 1:
                        # If we have only one good entry, format it and addd it to the list that will eventually return:
                        self.bond_dissociation_energies += [
                            self.build_new_entry(good_entries, bonds)
                        ]
                    else:
                        # We shouldn't ever encounter more than one good entry.
                        raise RuntimeError(
                            "There should only be one valid ring opening fragment! Exiting..."
                        )
            elif len(bonds) == 2:
                raise RuntimeError(
                    "Should only be trying to break two bonds if multibreak is true! Exiting..."
                )
            else:
                print(
                    "No reason to try and break more than two bonds at once! Exiting..."
                )
                raise ValueError
            frag_success = False
        if frag_success:
            # If the principle did successfully split, then we aren't dealing with a ring bond.
            # As above, we begin by making sure we haven't already encountered an identical pair of fragments:
            frags_done = False
            for frag_pair in self.done_frag_pairs:
                if frag_pair[0].isomorphic_to(frags[0]):
                    if frag_pair[1].isomorphic_to(frags[1]):
                        frags_done = True
                        break
                elif frag_pair[1].isomorphic_to(frags[0]):
                    if frag_pair[0].isomorphic_to(frags[1]):
                        frags_done = True
                        break
            if not frags_done:
                # If we haven't, we save this pair and search for the relevant fragment entries:
                self.done_frag_pairs += [frags]
                num_entries_for_this_frag_pair = 0
                frag1_entries = self.search_fragment_entries(frags[0])
                frag2_entries = self.search_fragment_entries(frags[1])
                frag1_charges_found = []
                frag2_charges_found = []
                # We then check for our expected charges of each fragment:
                for frag1 in frag1_entries[0] + frag1_entries[1]:
                    if frag1["initial_molecule"][
                            "charge"] not in frag1_charges_found:
                        frag1_charges_found += [
                            frag1["initial_molecule"]["charge"]
                        ]
                for frag2 in frag2_entries[0] + frag2_entries[1]:
                    if frag2["initial_molecule"][
                            "charge"] not in frag2_charges_found:
                        frag2_charges_found += [
                            frag2["initial_molecule"]["charge"]
                        ]
                # If we're missing some of either, tell the user:
                if len(frag1_charges_found) < len(self.expected_charges):
                    bb = BabelMolAdaptor(frags[0].molecule)
                    pbmol = bb.pybel_mol
                    smiles = pbmol.write(str("smi")).split()[0]
                    for charge in self.expected_charges:
                        if charge not in frag1_charges_found:
                            print("Missing charge " + str(charge) +
                                  " for fragment " + smiles)
                if len(frag2_charges_found) < len(self.expected_charges):
                    bb = BabelMolAdaptor(frags[1].molecule)
                    pbmol = bb.pybel_mol
                    smiles = pbmol.write(str("smi")).split()[0]
                    for charge in self.expected_charges:
                        if charge not in frag2_charges_found:
                            print("Missing charge " + str(charge) +
                                  " for fragment " + smiles)
                # Now we attempt to pair fragments with the right total charge, starting with only fragments with no
                # structural change:
                for frag1 in frag1_entries[0]:  # 0 -> no structural change
                    for frag2 in frag2_entries[0]:  # 0 -> no structural change
                        if (frag1["initial_molecule"]["charge"] +
                                frag2["initial_molecule"]["charge"] == self.
                                molecule_entry["final_molecule"]["charge"]):
                            self.bond_dissociation_energies += [
                                self.build_new_entry([frag1, frag2], bonds)
                            ]
                            num_entries_for_this_frag_pair += 1
                # If we haven't found the number of fragment pairs that we expect, we expand our search to include
                # fragments that do exhibit structural change:
                if num_entries_for_this_frag_pair < len(self.expected_charges):
                    for frag1 in frag1_entries[0]:  # 0 -> no structural change
                        for frag2 in frag2_entries[
                                1]:  # 1 -> YES structural change
                            if (frag1["initial_molecule"]["charge"] +
                                    frag2["initial_molecule"]["charge"] ==
                                    self.molecule_entry["final_molecule"]
                                ["charge"]):
                                self.bond_dissociation_energies += [
                                    self.build_new_entry([frag1, frag2], bonds)
                                ]
                                num_entries_for_this_frag_pair += 1
                    for frag1 in frag1_entries[
                            1]:  # 1 -> YES structural change
                        for frag2 in frag2_entries[
                                0]:  # 0 -> no structural change
                            if (frag1["initial_molecule"]["charge"] +
                                    frag2["initial_molecule"]["charge"] ==
                                    self.molecule_entry["final_molecule"]
                                ["charge"]):
                                self.bond_dissociation_energies += [
                                    self.build_new_entry([frag1, frag2], bonds)
                                ]
                                num_entries_for_this_frag_pair += 1
Example #13
0
    def __init__(self, molecule, optimize=False):
        """
        Instantiation method for FunctionalGroupExtractor.

        :param molecule: Either a filename, a pymatgen.core.structure.Molecule
            object, or a pymatgen.analysis.graphs.MoleculeGraph object.
        :param optimize: Default False. If True, then the input molecule will be
            modified, adding Hydrogens, performing a simple conformer search,
            etc.
        """

        self.molgraph = None

        if isinstance(molecule, str):
            try:
                if optimize:
                    obmol = BabelMolAdaptor.from_file(molecule,
                                                      file_format="mol")
                    # OBMolecule does not contain pymatgen Molecule information
                    # So, we need to wrap the obmol in a BabelMolAdapter
                    obmol.add_hydrogen()
                    obmol.make3d()
                    obmol.localopt()
                    self.molecule = obmol.pymatgen_mol
                else:
                    self.molecule = Molecule.from_file(molecule)
            except OSError:
                raise ValueError("Input must be a valid molecule file, a "
                                 "Molecule object, or a MoleculeGraph object.")

        elif isinstance(molecule, Molecule):
            if optimize:
                obmol = BabelMolAdaptor(molecule)
                obmol.add_hydrogen()
                obmol.make3d()
                obmol.localopt()

                self.molecule = obmol.pymatgen_mol
            else:
                self.molecule = molecule

        elif isinstance(molecule, MoleculeGraph):
            if optimize:
                obmol = BabelMolAdaptor(molecule.molecule)
                obmol.add_hydrogen()
                obmol.make3d()
                obmol.localopt()

                self.molecule = obmol.pymatgen_mol

            else:
                self.molecule = molecule.molecule
                self.molgraph = molecule

        else:
            raise ValueError("Input to FunctionalGroupExtractor must be"
                             "str, Molecule, or MoleculeGraph.")

        if self.molgraph is None:
            self.molgraph = MoleculeGraph.with_local_env_strategy(self.molecule,
                                                                  OpenBabelNN(),
                                                                  reorder=False,
                                                                  extend_structure=False)

        # Assign a specie and coordinates to each node in the graph,
        # corresponding to the Site in the Molecule object
        self.molgraph.set_node_attributes()

        self.species = nx.get_node_attributes(self.molgraph.graph, "specie")
Example #14
0
def main():
    def gcd(a, b):
        if b == 0:
            return a
        else:
            return gcd(b, a % b)

    def lcm(a, b):
        return a * b / gcd(a, b)

    import argparse
    parser = argparse.ArgumentParser(
        description="Place salt around a molecule")
    parser.add_argument("-m",
                        "--molecule",
                        dest="molecule",
                        type=str,
                        required=True,
                        help="the file name of molecule")
    parser.add_argument(
        "-l",
        "--ligand",
        dest="fragments",
        type=str,
        nargs='+',
        required=True,
        help=
        "the list of fragment file names to to be placed around the molecule")
    parser.add_argument(
        "-n",
        "--nums_fragments",
        dest="nums_fragments",
        type=int,
        nargs='+',
        required=True,
        help=
        "the number of each fragment, the order must be the same with FRAGMENTS"
    )
    parser.add_argument("-c",
                        "--charge",
                        dest="charge",
                        type=int,
                        required=True,
                        help="total charge of the system")
    parser.add_argument("-t",
                        "--taboo_tolerance",
                        dest="taboo_tolerance",
                        type=float,
                        default=1.0,
                        help="The radius to taboo a solution (in Angstrom)")
    parser.add_argument(
        "-r",
        "--ratio_taboo_particles",
        dest="ratio_taboo_particles",
        type=float,
        default=0.5,
        help=
        "ratio of particle within the tolerance to consider taboo current solution"
    )
    parser.add_argument("-o",
                        "--outputfile",
                        dest="outputfile",
                        type=str,
                        required=True,
                        help="the file name of the aligned conformer")
    parser.add_argument("-i",
                        "--iterations",
                        dest="iterations",
                        type=int,
                        default=600,
                        help="maximum number of evaluations")
    parser.add_argument("-s",
                        "--size",
                        dest="size",
                        type=int,
                        default=15,
                        help="population size")
    parser.add_argument("-k",
                        "--num_neighbours",
                        dest="num_neighbours",
                        type=int,
                        default=2,
                        help="number of neighbours")
    parser.add_argument(
        "--force_ordered_fragment",
        dest="force_ordered_fragment",
        action="store_true",
        help=
        "set this option to keep the fragment of the same in the order of input along the X-axis"
    )
    parser.add_argument("--topology",
                        dest="topology",
                        choices=["ring", "star"],
                        type=str,
                        default="ring",
                        help="the topology of the PSO information network")
    parser.add_argument("--initial_guess",
                        dest="initial_guess",
                        choices=["breadth", "center", "volume"],
                        default="breadth",
                        help="where should particles should be initially put")
    parser.add_argument("--bound_setter",
                        dest="bound_setter",
                        choices=["chain", "volume"],
                        default="chain",
                        help="method to set the bound conditions of PSO")
    parser.add_argument(
        "--always_write_best",
        dest="always_write_best",
        action="store_true",
        help=
        "enable this option to output the best structure at every iteration")
    parser.add_argument("--random_seed",
                        dest="random_seed",
                        default=None,
                        type=int,
                        help="random seed for PSO, an integer is expected")
    parser.add_argument("--max_generations_each_conformer",
                        dest="max_generations_each_conformer",
                        default=100,
                        type=int,
                        help="maximum generations for each conformer")
    parser.add_argument("-e",
                        "--evaluator",
                        dest="evaluator",
                        type=str,
                        default="hardsphere",
                        choices=["hardsphere", "sqm"],
                        help="Energy Evaluator")
    parser.add_argument("--solvent",
                        dest="solvent",
                        type=str,
                        default=None,
                        choices=["H2O"],
                        help="Solvent for ALPB model")
    options = parser.parse_args()

    molecule = BabelMolAdaptor.from_file(
        options.molecule,
        os.path.splitext(options.molecule)[1][1:])._obmol
    fragments = []
    for frag_file in options.fragments:
        file_format = os.path.splitext(frag_file)[1][1:]
        fragments.append(
            BabelMolAdaptor.from_file(frag_file, file_format)._obmol)

    if options.evaluator == 'hardsphere':
        rad_util = AtomicRadiusUtils(covalent_radius_scale=2.0,
                                     metal_radius_scale=0.5)
        mol_radius = rad_util.get_radius(molecule)
        fragments_atom_radius = [
            rad_util.get_radius(frag) for frag in fragments
        ]
        energy_evaluator = HardSphereEnergyEvaluator(
            mol_coords,
            mol_radius,
            fragments_atom_radius,
            nums_fragments,
        )

        # qcout_molecule = QCOutput(options.molecule)
        # qcout_cation = QCOutput(options.cation)
        # qcout_anion = QCOutput(options.anion)
        # total_charge_cation = qcout_cation.data[0]["molecules"][-1].charge
        # total_charge_anion = qcout_anion.data[0]["molecules"][-1].charge
        # total_charge_mol = qcout_molecule.data[0]["molecules"][-1].charge
        # num_lcm = lcm(total_charge_cation, -total_charge_anion)
        # num_cation = num_lcm / total_charge_cation
        # num_anion = num_lcm / -total_charge_anion
        # pymatgen_mol_molecule = qcout_molecule.data[0]["molecules"][-1]
        # pymatgen_mol_cation = qcout_cation.data[0]["molecules"][-1]
        # pymatgen_mol_anion = qcout_anion.data[0]["molecules"][-1]
        #
        # molecule = BabelMolAdaptor(pymatgen_mol_molecule)._obmol
        #
        # obmol_cation = BabelMolAdaptor(pymatgen_mol_cation)._obmol
        #
        # obmol_anion = BabelMolAdaptor(pymatgen_mol_anion)._obmol
        # energy_evaluator = HardSphereElectrostaticEnergyEvaluator.from_qchem_output(
        #     qcout_molecule, qcout_cation, qcout_anion)
        # fragments = [obmol_cation, obmol_anion]
    else:
        energy_evaluator = SemiEmpricalQuatumMechanicalEnergyEvaluator(
            molecule,
            fragments,
            options.nums_fragments,
            total_charge=options.charge,
            taboo_tolerance_ang=options.taboo_tolerance,
            force_order_fragment=options.force_ordered_fragment,
            bound_setter=options.bound_setter,
            solvent=options.solvent)
    if len(fragments) != len(options.nums_fragments):
        raise ValueError(
            "you must specify the duplicated count for every fragment")
    placer = IonPlacer(
        molecule=molecule,
        fragments=fragments,
        nums_fragments=options.nums_fragments,
        energy_evaluator=energy_evaluator,
        taboo_tolerance_ang=options.taboo_tolerance,
        taboo_tolerance_particle_ratio=options.ratio_taboo_particles,
        topology=options.topology,
        initial_guess=options.initial_guess,
        bound_setter=options.bound_setter,
        always_write_best=options.always_write_best,
        random_seed=options.random_seed,
        max_generations_each_conformer=options.max_generations_each_conformer,
        output_file=options.outputfile)
    energy_evaluator.arranger = placer
    placer.place(max_evaluations=options.iterations,
                 pop_size=options.size,
                 neighborhood_size=options.num_neighbours)
    print('It took {:.1f} seconds to place the salt'.format(
        placer.playing_time))
    print(f'xtb was called {energy_evaluator.run_number} times.')
    print(f'global best energy: {energy_evaluator.global_best_energy} Ha.')
Example #15
0
    def uniform_labels(self, mol1, mol2):
        """
        Pair the geometrically equivalent atoms of the molecules.
        Calculate RMSD on all possible isomorphism mappings and return mapping
        with the least RMSD

        Args:
            mol1: First molecule. OpenBabel OBMol or pymatgen Molecule object.
            mol2: Second molecule. OpenBabel OBMol or pymatgen Molecule object.

        Returns:
            (list1, list2) if uniform atom order is found. list1 and list2
            are for mol1 and mol2, respectively. Their length equal
            to the number of atoms. They represents the uniform atom order
            of the two molecules. The value of each element is the original
            atom index in mol1 or mol2 of the current atom in uniform atom
            order.
            (None, None) if unform atom is not available.
        """
        obmol1 = BabelMolAdaptor(mol1).openbabel_mol
        obmol2 = BabelMolAdaptor(mol2).openbabel_mol

        h1 = self.get_molecule_hash(obmol1)
        h2 = self.get_molecule_hash(obmol2)
        if h1 != h2:
            return None, None

        query = ob.CompileMoleculeQuery(obmol1)
        isomapper = ob.OBIsomorphismMapper.GetInstance(query)
        isomorph = ob.vvpairUIntUInt()
        isomapper.MapAll(obmol2, isomorph)

        sorted_isomorph = [
            sorted(x, key=lambda morp: morp[0]) for x in isomorph
        ]
        label2_list = tuple(
            [tuple([p[1] + 1 for p in x]) for x in sorted_isomorph])

        vmol1 = obmol1
        aligner = ob.OBAlign(True, False)
        aligner.SetRefMol(vmol1)
        least_rmsd = float("Inf")
        best_label2 = None
        label1 = list(range(1, obmol1.NumAtoms() + 1))
        # noinspection PyProtectedMember
        elements1 = InchiMolAtomMapper._get_elements(vmol1, label1)
        for label2 in label2_list:
            # noinspection PyProtectedMember
            elements2 = InchiMolAtomMapper._get_elements(obmol2, label2)
            if elements1 != elements2:
                continue
            vmol2 = ob.OBMol()
            for i in label2:
                vmol2.AddAtom(obmol2.GetAtom(i))
            aligner.SetTargetMol(vmol2)
            aligner.Align()
            rmsd = aligner.GetRMSD()
            if rmsd < least_rmsd:
                least_rmsd = rmsd
                best_label2 = copy.copy(label2)
        return label1, best_label2
Example #16
0
    def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
        try:
            fullpath = os.path.abspath(dir_name)
            d = jsanitize(self.additional_fields, strict=True)
            d["schema"] = {
                "code": "atomate",
                "version": QChemDrone.__version__
            }
            d["dir_name"] = fullpath

            # If a saved "orig" input file is present, parse it incase the error handler made changes
            # to the initial input molecule or rem params, which we might want to filter for later
            if len(qcinput_files) > len(qcoutput_files):
                orig_input = QCInput.from_file(os.path.join(dir_name, qcinput_files.pop("orig")))
                d["orig"] = {}
                d["orig"]["molecule"] = orig_input.molecule.as_dict()
                d["orig"]["molecule"]["charge"] = int(d["orig"]["molecule"]["charge"])
                d["orig"]["rem"] = orig_input.rem
                d["orig"]["opt"] = orig_input.opt
                d["orig"]["pcm"] = orig_input.pcm
                d["orig"]["solvent"] = orig_input.solvent
                d["orig"]["smx"] = orig_input.smx

            if multirun:
                d["calcs_reversed"] = self.process_qchem_multirun(
                    dir_name, qcinput_files, qcoutput_files)
            else:
                d["calcs_reversed"] = [
                    self.process_qchemrun(dir_name, taskname,
                                          qcinput_files.get(taskname),
                                          output_filename)
                    for taskname, output_filename in qcoutput_files.items()
                ]

            # reverse the calculations data order so newest calc is first
            d["calcs_reversed"].reverse()

            d["structure_change"] = []
            d["warnings"] = {}
            for entry in d["calcs_reversed"]:
                if "structure_change" in entry and "structure_change" not in d["warnings"]:
                    if entry["structure_change"] != "no_change":
                        d["warnings"]["structure_change"] = True
                if "structure_change" in entry:
                    d["structure_change"].append(entry["structure_change"])
                for key in entry["warnings"]:
                    if key not in d["warnings"]:
                        d["warnings"][key] = True

            d_calc_init = d["calcs_reversed"][-1]
            d_calc_final = d["calcs_reversed"][0]

            d["input"] = {
                "initial_molecule": d_calc_init["initial_molecule"],
                "job_type": d_calc_init["input"]["rem"]["job_type"]
            }
            d["output"] = {
                "initial_molecule": d_calc_final["initial_molecule"],
                "job_type": d_calc_final["input"]["rem"]["job_type"],
                "mulliken": d_calc_final["Mulliken"][-1]
            }
            if "RESP" in d_calc_final:
                d["output"]["resp"] = d_calc_final["RESP"][-1]
            elif "ESP" in d_calc_final:
                d["output"]["esp"] = d_calc_final["ESP"][-1]

            if d["output"]["job_type"] == "opt" or d["output"]["job_type"] == "optimization":
                if "molecule_from_optimized_geometry" in d_calc_final:
                    d["output"]["optimized_molecule"] = d_calc_final[
                        "molecule_from_optimized_geometry"]
                    d["output"]["final_energy"] = d_calc_final["final_energy"]
                else:
                    d["output"]["final_energy"] = "unstable"
                if d_calc_final["opt_constraint"]:
                    d["output"]["constraint"] = [
                        d_calc_final["opt_constraint"][0],
                        float(d_calc_final["opt_constraint"][6])
                    ]
            if d["output"]["job_type"] == "freq" or d["output"]["job_type"] == "frequency":
                d["output"]["frequencies"] = d_calc_final["frequencies"]
                d["output"]["enthalpy"] = d_calc_final["total_enthalpy"]
                d["output"]["entropy"] = d_calc_final["total_entropy"]
                if d["input"]["job_type"] == "opt" or d["input"]["job_type"] == "optimization":
                    d["output"]["optimized_molecule"] = d_calc_final[
                        "initial_molecule"]
                    d["output"]["final_energy"] = d["calcs_reversed"][1][
                        "final_energy"]

            if "final_energy" not in d["output"]:
                if d_calc_final["final_energy"] != None:
                    d["output"]["final_energy"] = d_calc_final["final_energy"]
                else:
                    d["output"]["final_energy"] = d_calc_final["SCF"][-1][-1][0]
                # else:
                #     print(d_calc_final)

            if d_calc_final["completion"]:
                total_cputime = 0.0
                total_walltime = 0.0
                for calc in d["calcs_reversed"]:
                    if calc["walltime"] is not None:
                        total_walltime += calc["walltime"]
                    if calc["cputime"] is not None:
                        total_cputime += calc["cputime"]
                d["walltime"] = total_walltime
                d["cputime"] = total_cputime
            else:
                d["walltime"] = None
                d["cputime"] = None

            comp = d["output"]["initial_molecule"].composition
            d["formula_pretty"] = comp.reduced_formula
            d["formula_anonymous"] = comp.anonymized_formula
            d["formula_alphabetical"] = comp.alphabetical_formula
            d["chemsys"] = "-".join(sorted(set(d_calc_final["species"])))
            if d_calc_final["point_group"] != None:
                d["pointgroup"] = d_calc_final["point_group"]
            else:
                try:
                    d["pointgroup"] = PointGroupAnalyzer(d["output"]["initial_molecule"]).sch_symbol
                except ValueError:
                    d["pointgroup"] = "PGA_error"

            bb = BabelMolAdaptor(d["output"]["initial_molecule"])
            pbmol = bb.pybel_mol
            smiles = pbmol.write(str("smi")).split()[0]
            d["smiles"] = smiles

            d["state"] = "successful" if d_calc_final["completion"] else "unsuccessful"
            if "special_run_type" in d:
                if d["special_run_type"] == "frequency_flattener":
                    opt_traj = []
                    for entry in d["calcs_reversed"]:
                        if entry["input"]["rem"]["job_type"] == "opt" or entry["input"]["rem"]["job_type"] == "optimization":
                            doc = {"initial": {}, "final": {}}
                            doc["initial"]["molecule"] = entry["initial_molecule"]
                            doc["final"]["molecule"] = entry["molecule_from_last_geometry"]
                            doc["initial"]["total_energy"] = entry["energy_trajectory"][0]
                            doc["final"]["total_energy"] = entry["energy_trajectory"][-1]
                            doc["initial"]["scf_energy"] = entry["SCF"][0][-1][0]
                            doc["final"]["scf_energy"] = entry["SCF"][-1][-1][0]
                            doc["structure_change"] = entry["structure_change"]
                            opt_traj.append(doc)
                    opt_traj.reverse()
                    opt_trajectory = {"trajectory": opt_traj, "structure_change": [[ii, entry["structure_change"]] for ii,entry in enumerate(opt_traj)], "energy_increase": []}
                    for ii, entry in enumerate(opt_traj):
                        if entry["final"]["total_energy"] > entry["initial"]["total_energy"]:
                            opt_trajectory["energy_increase"].append([ii, entry["final"]["total_energy"]-entry["initial"]["total_energy"]])
                        if ii != 0:
                            if entry["final"]["total_energy"] > opt_traj[ii-1]["final"]["total_energy"]:
                                opt_trajectory["energy_increase"].append([ii-1, ii, entry["final"]["total_energy"]-opt_traj[ii-1]["final"]["total_energy"]])
                            struct_change = check_for_structure_changes(opt_traj[ii-1]["final"]["molecule"], entry["final"]["molecule"])
                            if struct_change != entry["structure_change"]:
                                opt_trajectory["structure_change"].append([ii-1, ii, struct_change])
                                d["warnings"]["between_iteration_structure_change"] = True
                    if "linked" in d:
                        if d["linked"] == True:
                            opt_trajectory["discontinuity"] = {"structure": [], "scf_energy": [], "total_energy": []}
                            for ii, entry in enumerate(opt_traj):
                                if ii != 0:
                                    if entry["initial"]["molecule"] != opt_traj[ii-1]["final"]["molecule"]:
                                        opt_trajectory["discontinuity"]["structure"].append([ii-1,ii])
                                        d["warnings"]["linked_structure_discontinuity"] = True
                                    if entry["initial"]["total_energy"] != opt_traj[ii-1]["final"]["total_energy"]:
                                        opt_trajectory["discontinuity"]["total_energy"].append([ii-1,ii])
                                    if entry["initial"]["scf_energy"] != opt_traj[ii-1]["final"]["scf_energy"]:
                                        opt_trajectory["discontinuity"]["scf_energy"].append([ii-1,ii])
                    d["opt_trajectory"] = opt_trajectory

                    if d["state"] == "successful":
                        orig_num_neg_freq = sum(1 for freq in d["calcs_reversed"][-2]["frequencies"] if freq < 0)
                        orig_energy = d_calc_init["final_energy"]
                        final_num_neg_freq = sum(1 for freq in d_calc_final["frequencies"] if freq < 0)
                        final_energy = d["calcs_reversed"][1]["final_energy"]
                        d["num_frequencies_flattened"] = orig_num_neg_freq - final_num_neg_freq
                        if final_num_neg_freq > 0: # If a negative frequency remains,
                            # and it's too large to ignore,
                            if final_num_neg_freq > 1 or abs(d["output"]["frequencies"][0]) >= 15.0:
                                d["state"] = "unsuccessful" # then the flattening was unsuccessful
                        if final_energy > orig_energy:
                            d["warnings"]["energy_increased"] = True

            d["last_updated"] = datetime.datetime.utcnow()
            return d

        except Exception:
            logger.error(traceback.format_exc())
            logger.error("Error in " + os.path.abspath(dir_name) + ".\n" +
                         traceback.format_exc())
            raise
Example #17
0
    def get_molecule_data(self, mol_id):
        """
        Compile all useful molecular data for analysis, including molecule size
        (number of atoms), molecular weight, enthalpy, entropy, and functional
        groups.

        NOTE: This function automatically converts energy, enthalpy, and entropy
        into SI units (J/mol and J/mol*K)

        :param mol_id: Unique ID associated with the molecule.
        :return: dict of relevant molecule data.
        """

        mol_data = {"mol_id": mol_id}

        if self.db is None:
            raise RuntimeError("Cannot query database; connection is invalid."
                               " Try to connect again.")

        collection = self.db.db["molecules"]

        mol_entry = collection.find_one({"mol_id": mol_id})

        for calc in mol_entry["calcs_reversed"]:
            if calc["task"]["name"] in ["freq", "frequency"]:
                mol_data["enthalpy"] = calc["enthalpy"] * 4.184 * 1000
                mol_data["entropy"] = calc["entropy"] * 4.184
            if calc["task"]["name"] == "sp":
                mol_data["energy"] = calc[
                    "final_energy_sp"] * 627.509 * 4.184 * 1000
            if calc["task"]["name"] in ["opt", "optimization"]:
                mol_dict = calc["molecule_from_optimized_geometry"]
                mol_data["molecule"] = Molecule.from_dict(mol_dict)

        adaptor = BabelMolAdaptor(mol_data["molecule"])
        pbmol = adaptor.pybel_mol

        mol_data["number_atoms"] = len(mol_data["molecule"])
        mol_data["molecular_weight"] = pbmol.molwt
        mol_data["tpsa"] = pbmol.calcdesc()["TPSA"]

        extractor = FunctionalGroupExtractor(mol_data["molecule"])
        molgraph = extractor.molgraph
        func_grps = extractor.get_all_functional_groups()

        mol_data["functional_groups"] = extractor.categorize_functional_groups(
            func_grps)

        weights = nx.get_edge_attributes(molgraph.graph, "weight")
        bonds_checked = set()
        double_bonds = 0
        triple_bonds = 0
        for bond, weight in weights.items():
            # Remove index from multidigraph
            bond = (bond[0], bond[1])
            if int(weight) == 2 and bond not in bonds_checked:
                double_bonds += 1
            elif int(weight) == 3 and bond not in bonds_checked:
                triple_bonds += 1
            bonds_checked.add(bond)

        mol_data["double_bonds"] = double_bonds
        mol_data["triple_bonds"] = triple_bonds

        species = [str(s.specie) for s in mol_data["molecule"].sites]
        mol_data["species"] = dict(Counter(species))

        return mol_data
Example #18
0
 def test_from_file_return_all_molecules(self):
     adaptors = BabelMolAdaptor.from_file(os.path.join(
         test_dir, "multiple_frame_xyz.xyz"),
                                          "xyz",
                                          return_all_molecules=True)
     self.assertEqual(len(adaptors), 302)
Example #19
0
    def __init__(self, molecule, optimize=False):
        """
        Instantiation method for FunctionalGroupExtractor.

        :param molecule: Either a filename, a pymatgen.core.structure.Molecule
            object, or a pymatgen.analysis.graphs.MoleculeGraph object.
        :param optimize: Default False. If True, then the input molecule will be
            modified, adding Hydrogens, performing a simple conformer search,
            etc.
        """

        self.molgraph = None

        if isinstance(molecule, str):
            try:
                if optimize:
                    obmol = BabelMolAdaptor.from_file(molecule,
                                                      file_format="mol")
                    # OBMolecule does not contain pymatgen Molecule information
                    # So, we need to wrap the obmol in a BabelMolAdapter
                    obmol.add_hydrogen()
                    obmol.make3d()
                    obmol.localopt()
                    self.molecule = obmol.pymatgen_mol
                else:
                    self.molecule = Molecule.from_file(molecule)
            except OSError:
                raise ValueError("Input must be a valid molecule file, a "
                                 "Molecule object, or a MoleculeGraph object.")

        elif isinstance(molecule, Molecule):
            if optimize:
                obmol = BabelMolAdaptor(molecule)
                obmol.add_hydrogen()
                obmol.make3d()
                obmol.localopt()

                self.molecule = obmol.pymatgen_mol
            else:
                self.molecule = molecule

        elif isinstance(molecule, MoleculeGraph):
            if optimize:
                obmol = BabelMolAdaptor(molecule.molecule)
                obmol.add_hydrogen()
                obmol.make3d()
                obmol.localopt()

                self.molecule = obmol.pymatgen_mol

            else:
                self.molecule = molecule.molecule
                self.molgraph = molecule

        else:
            raise ValueError("Input to FunctionalGroupExtractor must be"
                             "str, Molecule, or MoleculeGraph.")

        if self.molgraph is None:
            self.molgraph = MoleculeGraph.with_local_env_strategy(
                self.molecule,
                OpenBabelNN(),
                reorder=False,
                extend_structure=False)

        # Assign a specie and coordinates to each node in the graph,
        # corresponding to the Site in the Molecule object
        self.molgraph.set_node_attributes()

        self.species = nx.get_node_attributes(self.molgraph.graph, "specie")
Example #20
0
 def test_make3d(self):
     mol_0d = pb.readstring("smi", "CCCC").OBMol
     adaptor = BabelMolAdaptor(mol_0d)
     adaptor.make3d()
     self.assertEqual(mol_0d.GetDimension(), 3)
Example #21
0
 def test_make3d(self):
     mol_0d = pb.readstring("smi", "CCCC").OBMol
     adaptor = BabelMolAdaptor(mol_0d)
     adaptor.make3d()
     self.assertEqual(mol_0d.GetDimension(), 3)
Example #22
0
    def generate_doc(self, path, molecule_file, template_file, output_file,
                     isomers_file, internal_coordinate_file,
                     optimized_geom_file):

        try:
            fullpath = os.path.abspath(path)

            d = jsanitize(self.additional_fields, strict=True)

            d["schema"] = {"code": "atomate", "version": GSMDrone.__version__}

            d["dir_name"] = fullpath

            # TODO: Consider error handlers
            # Include an "orig" section to the doc

            # Parse all relevant files
            initial_mol = parse_multi_xyz(molecule_file)
            temp_file = QCTemplate.from_file(template_file)
            if isomers_file is not None:
                iso_file = GSMIsomerInput.from_file(isomers_file)
            out_file = GSMOutput(output_file)
            if internal_coordinate_file is not None:
                ic_file = GSMInternalCoordinateDataParser(
                    internal_coordinate_file)
            opt_file = GSMOptimizedStringParser(optimized_geom_file)

            d["warnings"] = dict()

            # INPUTS
            d["input"] = dict()
            d["input"]["initial_reactants"] = None
            d["input"]["initial_products"] = None

            if len(initial_mol) == 1:
                d["input"]["initial_reactants"] = initial_mol[0]
            elif len(initial_mol) == 2:
                d["input"]["initial_reactants"] = initial_mol[0]
                d["input"]["initial_products"] = initial_mol[1]

            d["input"]["mode"] = out_file.data["inputs"]["gsm_type"]

            num_nodes = out_file.data["inputs"].get("num_nodes")
            if num_nodes is None:
                if "SE" in d["input"]["mode"]:
                    d["input"]["num_nodes"] = 30
                else:
                    d["input"]["num_nodes"] = 9
            else:
                d["input"]["num_nodes"] = int(num_nodes)

            d["input"]["reactants_fixed"] = out_file.data["inputs"].get(
                "reactant_geom_fixed", False)
            d["input"]["products_fixed"] = out_file.data["inputs"].get(
                "product_geom_fixed", False)

            d["input"]["template"] = {
                "rem": temp_file.rem,
                "pcm": temp_file.pcm,
                "solvent": temp_file.solvent,
                "smx": temp_file.smx
            }

            if "SE" in d["input"]["mode"]:
                if isomers_file is None:
                    raise ValueError(
                        "No isomers file provided for single-ended calculation."
                    )
                else:
                    d["input"]["isomers"] = {
                        "bonds_formed": iso_file.bonds_formed,
                        "bonds_broken": iso_file.bonds_broken,
                        "angles": iso_file.angles,
                        "torsions": iso_file.torsions,
                        "out_of_planes": iso_file.out_of_planes
                    }

            d["input"]["parameters"] = out_file.data["inputs"]

            # OUTPUTS
            d["output"] = dict()

            d["output"]["completion"] = out_file.data["completion"]

            if "SE" in d["input"]["mode"]:
                d["output"]["initial_energy"] = out_file.data.get(
                    "initial_energy", None)
                d["driving_coord_trajectories"] = out_file.data.get(
                    "driving_coord_trajectories", None)
            else:
                d["output"]["initial_energy_rct"] = out_file.data.get(
                    "initial_energy_rct", None)
                d["output"]["initial_energy_pro"] = out_file.data.get(
                    "initial_energy_pro", None)

            d["output"]["energy_profile"] = out_file.data.get(
                "final_energy_profile", None)
            d["output"]["path_uphill"] = out_file.data.get(
                "final_energy_profile", None)
            d["output"]["path_dissociative"] = out_file.data.get(
                "final_path_dissociative", None)
            d["output"]["minima_nodes"] = out_file.data.get(
                "final_min_nodes", None)
            d["output"]["maxima_nodes"] = out_file.data.get(
                "final_max_nodes", None)
            d["output"]["minima_nodes"] = out_file.data.get(
                "final_min_nodes", None)
            d["output"]["maximum_node"] = out_file.data.get(
                "final_max_node", None)
            d["output"]["maximum_energy"] = out_file.data.get(
                "final_max_energy", None)

            if d["output"]["completion"]:
                d["output"]["reactant_node"] = out_file.data["min_rct_node"]
                d["output"]["product_node"] = out_file.data["min_pro_node"]
                d["output"]["ts_node"] = out_file.data["ts_node"]
                d["output"]["absolute_ts_energy"] = out_file.data[
                    "absolute_ts_energy"]
                d["output"]["ts_energy"] = out_file.data["ts_energy"]
                d["output"]["delta_e"] = out_file.data["delta_e"]
            else:
                d["output"]["reactant_node"] = None
                d["output"]["product_node"] = None
                d["output"]["ts_node"] = None
                d["output"]["ts_energy"] = None
                d["output"]["absolute_ts_energy"] = None
                d["output"]["delta_e"] = None

            if d["output"]["completion"]:
                if internal_coordinate_file is not None:
                    d["output"]["internal_coords"] = ic_file.data
                else:
                    d["output"]["internal_coords"] = None
                d["output"]["species"] = opt_file.data["species"]
                d["output"]["optimized_node_geometries"] = opt_file.data[
                    "geometries"]
                d["output"]["optimized_node_molecules"] = opt_file.data[
                    "molecules"]
                d["output"]["optimized_node_energies"] = opt_file.data[
                    "energies"]
                d["output"]["optimized_node_forces"] = opt_file.data["forces"]
                if d["output"]["ts_node"] is not None:
                    d["output"]["ts_molecule"] = d["output"][
                        "optimized_node_molecules"][d["output"]["ts_node"]]
                else:
                    d["output"]["ts_molecule"] = None
                if d["output"]["reactant_node"] is not None:
                    d["output"]["reactant_molecule"] = d["output"][
                        "optimized_node_molecules"][d["output"]
                                                    ["reactant_node"]]
                else:
                    d["output"]["reactant_molecule"] = None
                if d["output"]["product_node"] is not None:
                    d["output"]["product_molecule"] = d["output"][
                        "optimized_node_molecules"][d["output"]
                                                    ["product_node"]]
                else:
                    d["output"]["product_molecule"] = None
            else:
                d["output"]["internal_coords"] = None
                d["output"]["species"] = None
                d["output"]["optimized_node_geometries"] = None
                d["output"]["optimized_node_molecules"] = None
                d["output"]["optimized_node_energies"] = None
                d["output"]["optimized_node_forces"] = None
                d["output"]["ts_molecule"] = None
                d["output"]["reactant_molecule"] = None
                d["output"]["product_molecule"] = None

            d["calc"] = out_file.data

            d["warnings"] = out_file.data["warnings"]
            d["errors"] = out_file.data["errors"]

            # if d_calc_final["completion"]:
            #     total_cputime = 0.0
            #     total_walltime = 0.0
            #     for calc in d["calcs_reversed"]:
            #         if "walltime" in calc and "cputime" in calc:
            #             if calc["walltime"] is not None:
            #                 total_walltime += calc["walltime"]
            #             if calc["cputime"] is not None:
            #                 total_cputime += calc["cputime"]
            #     d["walltime"] = total_walltime
            #     d["cputime"] = total_cputime
            # else:
            #     d["walltime"] = None
            #     d["cputime"] = None

            comp = d["input"]["initial_reactants"].composition
            d["formula_pretty"] = comp.reduced_formula
            d["formula_anonymous"] = comp.anonymized_formula
            d["formula_alphabetical"] = comp.alphabetical_formula

            elements = list()
            for component in d["formula_alphabetical"].split(" "):
                elements.append("".join(
                    [i for i in component if not i.isdigit()]))
            d["chemsys"] = "-".join(sorted(set(elements)))

            if d["output"]["ts_molecule"] is not None:
                try:
                    d["pointgroup_ts"] = PointGroupAnalyzer(
                        d["output"]["ts_molecule"]).sch_symbol
                except ValueError:
                    d["pointgroup_ts"] = "PGA_error"
            else:
                d["pointgroup_ts"] = None

            if d["output"]["reactant_molecule"] is not None:
                try:
                    d["pointgroup_reactant"] = PointGroupAnalyzer(
                        d["output"]["reactant_molecule"]).sch_symbol
                except ValueError:
                    d["pointgroup_reactant"] = "PGA_error"
            else:
                d["pointgroup_reactant"] = None

            if d["output"]["product_molecule"] is not None:
                try:
                    d["pointgroup_product"] = PointGroupAnalyzer(
                        d["output"]["product_molecule"]).sch_symbol
                except ValueError:
                    d["pointgroup_product"] = "PGA_error"
            else:
                d["pointgroup_product"] = None

            if d["output"]["ts_molecule"] is not None:
                bb = BabelMolAdaptor(d["output"]["ts_molecule"])
                pbmol = bb.pybel_mol
                smiles = pbmol.write(str("smi")).split()[0]
                d["smiles"] = smiles
            else:
                d["smiles"] = None

            d["state"] = "successful" if d["output"][
                "completion"] else "unsuccessful"

            d["last_updated"] = datetime.datetime.utcnow()
            return d

        except Exception:
            logger.error(traceback.format_exc())
            logger.error("Error in " + os.path.abspath(path) + ".\n" +
                         traceback.format_exc())
            raise
Example #23
0
        dirname = filename[:-4]
        if os.path.exists(dirname):
            print("directory " + dirname + " already exists")
            print("please delete it before use this script")
            exit(0)
    for filename in filenames:
        dirname = filename[:-4]
        os.mkdir(dirname)
        print("reading", filename)
        text = None
        with open(filename) as f:
            text = f.read()
        mol_tokens = str_to_obmols(text)
        for (i, (mol, cas)) in enumerate(mol_tokens):
            print("processing molecule", i + 1, cas, "of", len(mol_tokens),
                  "molecules")
            try:
                build3d(mol)
            except:
                os.system("echo " + cas + " >> failed_mols.txt")
        pmg_mols = [(BabelMolAdaptor(obmol).pymatgen_mol, cas)
                    for (obmol, cas) in mol_tokens]
        snl_texts = [
            StructureNL(mol, "Xiaohui Qu <*****@*****.**>", remarks=cas)
            for (mol, cas) in pmg_mols
        ]
        for snl in snl_texts:
            with open(dirname + "/" + snl.remarks[0] + ".snl", 'w') as f:
                json.dump(snl.as_dict(), f, indent=4)
    print("Done")
Example #24
0
 def get_inchi(mol):
     bb = BabelMolAdaptor(mol)
     pbmol = bb.pybel_mol
     return pbmol.write(str("inchi")).strip()
Example #25
0
 def test_from_file(self):
     adaptor = BabelMolAdaptor.from_file(
         os.path.join(test_dir, "Ethane_e.pdb"), "pdb")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H6 C2")
Example #26
0
 def test_from_file(self):
     adaptor = BabelMolAdaptor.from_file(
         os.path.join(PymatgenTest.TEST_FILES_DIR,
                      "molecules/Ethane_e.pdb"), "pdb")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H6 C2")
Example #27
0
 def test_from_string(self):
     xyz = XYZ(self.mol)
     adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H4 C1")
Example #28
0
 def test_from_string(self):
     xyz = XYZ(self.mol)
     adaptor = BabelMolAdaptor.from_string(str(xyz), "xyz")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H4 C1")
Example #29
0
coords_list = []
for line in f:
    line_splitted = line.split()
    species_list.append(line_splitted[0])
    line_splitted.pop(0)
    line_splitted_float = []
    for coord in line_splitted:
        coord = float(coord)
        line_splitted_float.append(coord)
    coords_list.append(line_splitted_float)
mol = Molecule(species_list, coords_list)
"""
Find equivalent non-H sites then store a list of non-H atom without equivalent sites (label_list).
"""
mapper = InchiMolAtomMapper()
labelInfo = mapper._inchi_labels(BabelMolAdaptor(mol)._obmol)
# print(labelInfo,len(labelInfo))
label_list = list(labelInfo[0])
for equivalent_group in labelInfo[1]:
    for atomLabel in range(0, len(equivalent_group)):
        if atomLabel > 0:
            label_list.remove(labelInfo[0][equivalent_group[atomLabel] - 1])
print("Atomic numbers of non-H non-equivalent sites are: ", label_list)
"""
Find a list of sites that can be substituted using functional groups. These sites are hydrogen that
directly bound to either C or N atoms.
"""
substitute_sitelist = []
for i in label_list:
    if mol[i - 1].species_string in ["C", "N"]:
        """
Example #30
0
    def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
        try:
            fullpath = os.path.abspath(dir_name)
            d = jsanitize(self.additional_fields, strict=True)
            d["schema"] = {
                "code": "atomate",
                "version": QChemDrone.__version__
            }
            d["dir_name"] = fullpath

            # If a saved "orig" input file is present, parse it incase the error handler made changes
            # to the initial input molecule or rem params, which we might want to filter for later
            if len(qcinput_files) > len(qcoutput_files):
                orig_input = QCInput.from_file(
                    os.path.join(dir_name, qcinput_files.pop("orig")))
                d["orig"] = {}
                d["orig"]["molecule"] = orig_input.molecule.as_dict()
                d["orig"]["molecule"]["charge"] = int(
                    d["orig"]["molecule"]["charge"])
                d["orig"]["rem"] = orig_input.rem
                d["orig"]["opt"] = orig_input.opt
                d["orig"]["pcm"] = orig_input.pcm
                d["orig"]["solvent"] = orig_input.solvent
                d["orig"]["smx"] = orig_input.smx

            if multirun:
                d["calcs_reversed"] = self.process_qchem_multirun(
                    dir_name, qcinput_files, qcoutput_files)
            else:
                d["calcs_reversed"] = [
                    self.process_qchemrun(dir_name, taskname,
                                          qcinput_files.get(taskname),
                                          output_filename)
                    for taskname, output_filename in qcoutput_files.items()
                ]

            # reverse the calculations data order so newest calc is first
            d["calcs_reversed"].reverse()

            d_calc_init = d["calcs_reversed"][-1]
            d_calc_final = d["calcs_reversed"][0]

            d["input"] = {
                "initial_molecule": d_calc_init["initial_molecule"],
                "job_type": d_calc_init["input"]["rem"]["job_type"]
            }
            d["output"] = {
                "initial_molecule": d_calc_final["initial_molecule"],
                "job_type": d_calc_final["input"]["rem"]["job_type"]
            }

            if d["output"]["job_type"] == "opt" or d["output"][
                    "job_type"] == "optimization":
                if "molecule_from_optimized_geometry" in d_calc_final:
                    d["output"]["optimized_molecule"] = d_calc_final[
                        "molecule_from_optimized_geometry"]
                    d["output"]["final_energy"] = d_calc_final["final_energy"]
                else:
                    d["output"]["final_energy"] = "unstable"
                if d_calc_final["opt_constraint"]:
                    d["output"]["constraint"] = [
                        d_calc_final["opt_constraint"][0],
                        float(d_calc_final["opt_constraint"][6])
                    ]
            if d["output"]["job_type"] == "freq" or d["output"][
                    "job_type"] == "frequency":
                d["output"]["frequencies"] = d_calc_final["frequencies"]
                d["output"]["enthalpy"] = d_calc_final["total_enthalpy"]
                d["output"]["entropy"] = d_calc_final["total_entropy"]
                if d["input"]["job_type"] == "opt" or d["input"][
                        "job_type"] == "optimization":
                    d["output"]["optimized_molecule"] = d_calc_final[
                        "initial_molecule"]
                    d["output"]["final_energy"] = d["calcs_reversed"][1][
                        "final_energy"]

            if d["output"]["job_type"] == "sp":
                d["output"]["final_energy"] = d_calc_final["final_energy"]

            if d_calc_final["completion"]:
                total_cputime = 0.0
                total_walltime = 0.0
                for calc in d["calcs_reversed"]:
                    if calc["walltime"] is not None:
                        total_walltime += calc["walltime"]
                    if calc["cputime"] is not None:
                        total_cputime += calc["cputime"]
                d["walltime"] = total_walltime
                d["cputime"] = total_cputime
            else:
                d["walltime"] = None
                d["cputime"] = None

            comp = d["output"]["initial_molecule"].composition
            d["formula_pretty"] = comp.reduced_formula
            d["formula_anonymous"] = comp.anonymized_formula
            d["chemsys"] = "-".join(sorted(set(d_calc_final["species"])))
            if d_calc_final["point_group"] != None:
                d["pointgroup"] = d_calc_final["point_group"]
            else:
                try:
                    d["pointgroup"] = PointGroupAnalyzer(
                        d["output"]["initial_molecule"]).sch_symbol
                except ValueError:
                    d["pointgroup"] = "PGA_error"

            bb = BabelMolAdaptor(d["output"]["initial_molecule"])
            pbmol = bb.pybel_mol
            smiles = pbmol.write(str("smi")).split()[0]
            d["smiles"] = smiles

            d["state"] = "successful" if d_calc_final[
                "completion"] else "unsuccessful"
            if "special_run_type" in d:
                if d["special_run_type"] == "frequency_flattener":
                    d["num_frequencies_flattened"] = int((len(qcinput_files) /
                                                          2) - 1)
                    if d["state"] == "successful":
                        if d_calc_final["frequencies"][
                                0] < 0:  # If a negative frequency remains,
                            d["state"] = "unsuccessful"  # then the flattening was unsuccessful
            d["last_updated"] = datetime.datetime.utcnow()
            return d

        except Exception:
            logger.error(traceback.format_exc())
            logger.error("Error in " + os.path.abspath(dir_name) + ".\n" +
                         traceback.format_exc())
            raise
Example #31
0
 def add_hydrogen(self):
     mol_0d = pb.readstring("smi", "CCCC").OBMol
     self.assertEqual(len(pb.Molecule(mol_0d).atoms), 2)
     adaptor = BabelMolAdaptor(mol_0d)
     adaptor.add_hydrogen()
     self.assertEqual(len(adaptor.pymatgen_mol.sites), 14)
Example #32
0
def create_rdkit_mol_from_mol_graph(mol_graph,
                                    name=None,
                                    force_sanitize=False,
                                    metals={
                                        "Li": 1,
                                        "Mg": 2
                                    }):
    """
    Create a rdkit molecule from molecule graph, with bond type perceived by babel.
    Done in the below steps:

    1. create a babel mol without metal atoms.
    2. perceive bond order (conducted by BabelMolAdaptor)
    3. adjust formal charge of metal atoms so as not to violate valence rule
    4. create rdkit mol based on species, coords, bonds, and formal charge

    Args:
        mol_graph (pymatgen MoleculeGraph): molecule graph
        name (str): name of the molecule
        force_sanitize (bool): whether to force sanitization of the rdkit mol
        metals dict: with metal atom (str) as key and the number of valence electrons
            as key.

    Returns:
        m: rdkit Chem.Mol
        bond_types (dict): bond types assigned to the created rdkit mol
    """

    pymatgen_mol = mol_graph.molecule
    species = [str(s) for s in pymatgen_mol.species]
    coords = pymatgen_mol.cart_coords
    bonds = [
        tuple(sorted([i, j])) for i, j, attr in mol_graph.graph.edges.data()
    ]

    # create babel mol without metals
    pmg_mol_no_metals = remove_metals(pymatgen_mol)
    adaptor = BabelMolAdaptor(pmg_mol_no_metals)
    ob_mol = adaptor.openbabel_mol

    # get babel bond order of mol without metals
    ob_bond_order = {}
    for bd in ob.OBMolBondIter(ob_mol):
        k = tuple(sorted([bd.GetBeginAtomIdx(), bd.GetEndAtomIdx()]))
        v = bd.GetBondOrder()
        ob_bond_order[k] = v

    # create bond type
    atom_idx_mapping = pymatgen_2_babel_atom_idx_map(pymatgen_mol, ob_mol)
    bond_types = {}

    for bd in bonds:
        try:
            ob_bond = [atom_idx_mapping[a] for a in bd]

            # atom not in ob mol
            if None in ob_bond:
                raise KeyError
            # atom in ob mol
            else:
                ob_bond = tuple(sorted(ob_bond))
                v = ob_bond_order[ob_bond]
                if v == 0:
                    tp = BondType.UNSPECIFIED
                elif v == 1:
                    tp = BondType.SINGLE
                elif v == 2:
                    tp = BondType.DOUBLE
                elif v == 3:
                    tp = BondType.TRIPLE
                elif v == 5:
                    tp = BondType.AROMATIC
                else:
                    raise RuntimeError(f"Got unexpected babel bond order: {v}")

        except KeyError:
            atom1_spec, atom2_spec = [species[a] for a in bd]

            if atom1_spec in metals and atom2_spec in metals:
                raise RuntimeError("Got a bond between two metal atoms")

            # bond involves one and only one metal atom (atom not in ob mol case above)
            elif atom1_spec in metals or atom2_spec in metals:
                tp = Chem.rdchem.BondType.DATIVE

                # Dative bonds have the special characteristic that they do not affect
                # the valence on the start atom, but do affect the end atom.
                # Here we adjust the atom ordering in the bond for dative bond to make
                # metal the end atom.
                if atom1_spec in metals:
                    bd = tuple(reversed(bd))

            # bond not found by babel (atom in ob mol)
            else:
                tp = Chem.rdchem.BondType.UNSPECIFIED

        bond_types[bd] = tp

    # a metal atom can form multiple dative bond (e.g. bidentate LiEC), for such cases
    # we need to adjust the their formal charge so as not to violate valence rule
    formal_charge = adjust_formal_charge(species, bonds, metals)

    m = create_rdkit_mol(species, coords, bond_types, formal_charge, name,
                         force_sanitize)

    return m, bond_types
Example #33
0
    def get_task_doc(cls, path, fw_spec=None):
        """
        Get the entire task doc for a path, including any post-processing.
        """
        logger.info("Getting task doc for file:{}".format(path))
        qcout = QcOutput(zpath(path))
        data = qcout.data
        initial_mol = data[0]["molecules"][0]
        mol = data[0]["molecules"][-1]
        if data[0]["jobtype"] == "freq":
            mol = Molecule.from_dict(initial_mol.as_dict())
        bb = BabelMolAdaptor(mol)
        pbmol = bb.pybel_mol
        xyz = XYZ(mol)
        smiles = pbmol.write(str("smi")).split()[0]
        can = pbmol.write(str("can")).split()[0]
        inchi_final = pbmol.write(str("inchi")).strip()
        svg = cls.modify_svg(cls.xyz2svg(xyz))
        comp = mol.composition
        charge = mol.charge
        spin_mult = mol.spin_multiplicity
        data_dict = {}

        pga = PointGroupAnalyzer(mol)
        sch_symbol = pga.sch_symbol
        stationary_type = None
        has_structure_changing_job = False
        for d in data:
            if d["jobtype"] == "opt":
                data_dict["geom_opt"] = d
                has_structure_changing_job = True
            elif d["jobtype"] == "freq":
                data_dict["freq"] = d
                has_structure_changing_job = True
                if not d["has_error"]:
                    if d['frequencies'][0]["frequency"] < -0.00:
                        # it is stupied that -0.00 is less than 0.00
                        stationary_type = "non-minimum"
                    else:
                        stationary_type = "minimum"
                else:
                    stationary_type = "unknown"
            elif d["jobtype"] == "sp":
                suffix = "" if d["solvent_method"] == "NA" \
                    else "_" + d["solvent_method"]
                data_dict["scf" + suffix] = d
            elif d["jobtype"] == "aimd":
                data_dict["amid"] = d
                has_structure_changing_job = True

        data = data_dict

        d = {
            "path": os.path.abspath(path),
            "folder": os.path.basename(os.path.dirname(os.path.abspath(path))),
            "calculations": data,
            "molecule_initial": initial_mol.as_dict(),
            "molecule_final": mol.as_dict(),
            "pointgroup": sch_symbol,
            "pretty_formula": comp.reduced_formula,
            "reduced_cell_formula_abc": comp.alphabetical_formula,
            "formula": comp.formula,
            "charge": charge,
            "spin_multiplicity": spin_mult,
            "composition": comp.as_dict(),
            "elements": list(comp.as_dict().keys()),
            "nelements": len(comp),
            "smiles": smiles,
            "can": can,
            "inchi_final": inchi_final,
            "svg": svg,
            "xyz": str(xyz),
            "names": get_nih_names(smiles)
        }

        if stationary_type:
            d['stationary_type'] = stationary_type
        if fw_spec:
            inchi_initial = fw_spec['inchi']
            if inchi_initial != d['inchi_final']:
                d['inchi_changed'] = True
            else:
                d['inchi_changed'] = False
        if has_structure_changing_job:
            d['structure_changed'] = cls._check_structure_change(
                initial_mol, mol, path)
        else:
            d['structure_changed'] = False
        if d['structure_changed']:
            d['state'] = 'rejected'
            d['reject_reason'] = 'structural change'
        if "state" not in d:
            for v in data_dict.values():
                if v['has_error']:
                    d['state'] = "error"
                    errors = d.get("errors", [])
                    errors += v["errors"]
                    d["errors"] = errors
        if "state" not in d:
            d["state"] = "successful"

        return jsanitize(d)
Example #34
0
 def test_from_file(self):
     adaptor = BabelMolAdaptor.from_file(
         os.path.join(test_dir, "Ethane_e.pdb"), "pdb")
     mol = adaptor.pymatgen_mol
     self.assertEqual(mol.formula, "H6 C2")