Esempi in Python per get_molecule

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: moltherm.compute.utils

Metodo/funzione: get_molecule

Esempi su hotexamples.com: 9

get_molecule in Python: 9 esempi trovati. Questi sono i migliori esempi reali in Python per moltherm.compute.utils.get_molecule, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

    def check_appropriate_dirs(self, dirs):
        """
        Returns only those reactions which have appropriate products and
        reactants (products, reactants have same number of atoms).

        This is not a sophisticated checking mechanism, and could probably be
        easily improved upon.

        :return:
        """

        add_up = []

        for d in dirs:
            path = join(self.base_dir, d)
            files = [f for f in listdir(path) if isfile(join(path, f))]
            rcts = [
                f for f in files
                if f.startswith(self.reactant_pre) and f.endswith(".mol")
            ]
            pros = [
                f for f in files
                if f.startswith(self.product_pre) and f.endswith(".mol")
            ]

            rct_mols = [get_molecule(join(self.base_dir, d, r)) for r in rcts]
            pro_mols = [get_molecule(join(self.base_dir, d, p)) for p in pros]

            total_pro_length = sum([len(p) for p in pro_mols])
            total_rct_length = sum([len(r) for r in rct_mols])

            if total_pro_length == total_rct_length:
                add_up.append(d)

        return add_up

Esempio n. 2

Mostra file

File: test_utils.py Progetto: peiyuan-yu/Moltherm

    def test_get_molecule(self):
        benzene_file = join(files_dir, "benzene.mol")

        benzene_pmg = Molecule.from_file(benzene_file)
        benzene_moltherm = get_molecule(benzene_file)

        species_no_h = [
            e for e in benzene_moltherm.species if str(e).upper() != "H"
        ]
        self.assertEqual(species_no_h, benzene_pmg.species)

        # Test that implicit hydrogens are added appropriately
        species = sorted([str(e) for e in benzene_moltherm.species])
        self.assertEqual(
            species,
            ["C", "C", "C", "C", "C", "C", "H", "H", "H", "H", "H", "H"])

        # Ensure that get_molecule is deterministic, always produces the same
        # molecule
        self.assertEqual(benzene_moltherm, get_molecule(benzene_file))

        coords = np.array([[-7.59858151e-01, 1.16908119e+00, -1.61105859e-03],
                           [-1.39065495e+00, -7.49583582e-02, -9.63095317e-04],
                           [-6.28683825e-01, -1.24326334e+00, 6.60526465e-04],
                           [7.64084196e-01, -1.16752892e+00, 1.63614012e-03],
                           [1.39488100e+00, 7.65106237e-02, 9.87135623e-04],
                           [6.32909871e-01, 1.24481561e+00, -6.36441101e-04],
                           [-1.35352141e+00, 2.07932532e+00, -2.87509442e-03],
                           [-2.47578162e+00, -1.33964201e-01, -1.72330284e-03],
                           [-1.12014718e+00, -2.21251339e+00, 1.16530208e-03],
                           [1.35774746e+00, -2.07777305e+00, 2.90204589e-03],
                           [2.48000766e+00, 1.35516465e-01, 1.74638272e-03],
                           [1.12437322e+00, 2.21406566e+00, -1.14215271e-03]])

        self.assertTrue(np.allclose(benzene_moltherm.cart_coords, coords))

Esempio n. 3

Mostra file

    def get_modified_molecule_workflow(self,
                                       directory,
                                       reactant,
                                       index,
                                       func_group,
                                       qchem_input_params,
                                       sp_params,
                                       bond_order=1,
                                       do_rct=True,
                                       new_dir=None):
        """
        Modify a reactant molecule, mimic that change in the product, and then
        create a workflow with the modified molecules (and any other molecules
        not already in the database).

        Note: this function will check if a substitution is "allowed"; that is,


        :param directory: Subdirectory where the reaction files are.
        :param reactant: File name of the reactant to be modified. It MUST be
            a reactant, and cannot be the product molecule.
        :param index: Index (in the reactant molecule) where the functional
            group is to be substituted.
        :param func_group: Either a string representing a functional group (from
            pymatgen.structure.core.FunctionalGroups), or a Molecule with a
            dummy atom X.
        :param bond_order: Order of the bond between the functional group and
            the base molecule. Default 1, for single bond.
        :param do_rct: If True (default), calculate both modified reactant and
            modified product; if False, only calculate for the product.
        :param new_dir: Name for new directory to store modified molecules.
            Default is None.
        :return:
        """

        base_path = join(self.base_dir, directory)
        mol_files = [
            f for f in listdir(base_path)
            if isfile(join(base_path, f)) and f.endswith(".mol")
        ]
        # For this workflow, assume a single product
        rct_file = [f for f in mol_files if f == reactant][0]
        pro_file = [f for f in mol_files if f.startswith(self.product_pre)][0]

        # Set up - strategy to extract bond orders
        # Node match for isomorphism check
        strat = OpenBabelNN()
        nm = iso.categorical_node_match("specie", "C")

        # Set up molecule graphs, including node attributes
        rct_mg = MoleculeGraph.with_local_env_strategy(get_molecule(
            join(base_path, rct_file)),
                                                       strat,
                                                       reorder=False,
                                                       extend_structure=False)
        rct_mg.set_node_attributes()
        rct_graph = rct_mg.graph.to_undirected()

        pro_mg = MoleculeGraph.with_local_env_strategy(get_molecule(
            join(base_path, pro_file)),
                                                       strat,
                                                       reorder=False,
                                                       extend_structure=False)
        pro_mg.set_node_attributes()
        pro_graph = pro_mg.graph.to_undirected()

        # To determine the subgraph of pro_mg that is derived from the reactant
        matcher = iso.GraphMatcher(pro_graph, rct_graph, node_match=nm)

        if not matcher.subgraph_is_isomorphic():
            raise RuntimeError("Cannot find reactant molecule within product "
                               "molecule.")
        else:
            for mm in matcher.subgraph_isomorphisms_iter():
                mapping = mm

        # Reverse mapping
        mapping = {mapping[i]: i for i in mapping.keys()}

        new_path = None
        if new_dir is not None:
            try:
                os.mkdir(join(self.base_dir, new_dir))
            except FileExistsError:
                print("New directory {} already exists in {}".format(
                    new_dir, self.base_dir))

            new_path = join(self.base_dir, new_dir)

        rct_mg.substitute_group(index,
                                func_group,
                                OpenBabelNN,
                                bond_order=bond_order,
                                extend_structure=False)
        pro_mg.substitute_group(mapping[index],
                                func_group,
                                OpenBabelNN,
                                bond_order=bond_order,
                                extend_structure=False)

        rct_name = rct_file.replace(".mol", "{}{}".format(func_group, index))
        pro_name = pro_file.replace(".mol", "{}{}".format(func_group, index))

        if new_path is None:
            new_path = base_path

        rct_mg.molecule.to(fmt="mol",
                           filename=join(new_path, rct_name + ".mol"))
        pro_mg.molecule.to(fmt="mol",
                           filename=join(new_path, pro_name + ".mol"))

        for mol_file in mol_files:
            if mol_file != pro_file and mol_file != rct_file:
                shutil.copyfile(join(base_path, mol_file),
                                join(new_path, mol_file))

        fws = []

        fws.append(
            OptFreqSPFW(molecule=pro_mg.molecule,
                        name="Modification: {}/{}".format(new_path, pro_name),
                        qchem_cmd="qchem -slurm",
                        input_file=join(new_path, pro_name + ".in"),
                        output_file=join(new_path, pro_name + ".out"),
                        qclog_file=join(new_path, pro_name + ".qclog"),
                        max_cores=32,
                        qchem_input_params=qchem_input_params,
                        sp_params=sp_params,
                        db_file=self.db_file))

        if do_rct:
            fws.append(
                OptFreqSPFW(molecule=rct_mg.molecule,
                            name="Modification: {}/{}".format(
                                new_path, rct_name),
                            qchem_cmd="qchem -slurm",
                            input_file=join(new_path, rct_name + ".in"),
                            output_file=join(new_path, rct_name + ".out"),
                            qclog_file=join(new_path, rct_name + ".qclog"),
                            max_cores=32,
                            qchem_input_params=qchem_input_params,
                            sp_params=sp_params,
                            db_file=self.db_file))

        return Workflow(fws)

Esempio n. 4

Mostra file

    def get_molecule_workflow(self,
                              path,
                              mol_id,
                              name_pre="molecule_opt_freq",
                              qchem_cmd="qchem -slurm",
                              max_cores=32,
                              qchem_input_params=None,
                              modify_mol=True,
                              max_iterations=3,
                              max_perturb_scale=0.3):
        """
        Generates a Fireworks Workflow to optimize a molecular geometry and
        perform a vibrational analysis (frequency calculation) in Q-Chem.

        :param path: Specified (sub)path in which to run the reaction. By
        default, this is None, and the Fireworks will run in self.base_dir
        :param mol_id: str representing the unique molecule identifier
        :param name_pre: str indicating the prefix which should be used for all
        Firework names
        :param qchem_cmd: str indicating how the Q-Chem code should be called.
        Default is "qchem -slurm", for a SLURM-based system.
        :param max_cores: int specifying how many cores the workflow should be
        split over. Default is 32.
        :param qchem_input_params: dict listing all parameters differing from
        default values.
        :param modify_mol: If True (default), use utility get_molecule to modify, including
        adding implicit hydrogens and performing an initial optimization.
        :param max_iterations (int): Number of perturbation -> optimization -> frequency
        iterations to perform. Defaults to 3.
        :param max_perturb_scale (float): The maximum scaled perturbation that can be
        applied to the molecule. Defaults to 0.3.
        :return: Workflow
        """

        fws = []

        base_path = join(self.base_dir, path, mol_id)

        files = [
            f for f in listdir(base_path) if isfile(join(base_path, f))
            and f.startswith(mol_id) and f.endswith(".mol")
        ]

        if len(files) > 1:

            print("Multiple valid molecule files found.")
            print("Generating workflows for all valid files found.")

            for i, file in enumerate(files):
                if modify_mol:
                    mol = get_molecule(join(base_path, file))
                else:
                    mol = Molecule.from_file(join(base_path, file))

                filename = file.split(".")[0]
                dir_name = join(base_path, "{}_{}".format(filename, i))
                try:
                    mkdir(dir_name)
                except FileExistsError:
                    print("Subdirectory {} already exists".format(dir_name))

                fw = FrequencyFlatteningOptimizeFW(
                    molecule=mol,
                    name=name_pre + "_{}".format(mol_id),
                    qchem_cmd=qchem_cmd,
                    qchem_input_params=qchem_input_params,
                    multimode="openmp",
                    max_cores=max_cores,
                    directory=join(base_path),
                    max_iterations=max_iterations,
                    max_molecule_perturb_scale=max_perturb_scale,
                    db_file=self.db_file)

                fws.append(fw)

        elif len(files) == 0:
            raise RuntimeError("No valid files found.")

        else:
            file = files[0]

            entry = self.molecules.find_one({"mol_id": mol_id})

            if entry is None:
                mol = get_molecule(join(base_path, file))
            else:
                geometry = entry["output"].get(
                    'optimized_molecule',
                    entry["output"].get('initial_molecule'))
                mol = Molecule.from_dict(geometry)

            fw = FrequencyFlatteningOptimizeFW(
                molecule=mol,
                name=name_pre + "_{}".format(mol_id),
                qchem_cmd=qchem_cmd,
                qchem_input_params=qchem_input_params,
                multimode="openmp",
                max_cores=max_cores,
                directory=base_path,
                max_iterations=3,
                db_file=self.db_file)

            fws.append(fw)

        return Workflow(fws)

Esempio n. 5

Mostra file

    def get_reaction_set_workflow(self,
                                  name_pre="opt_freq_sp",
                                  max_cores=32,
                                  qchem_input_params=None,
                                  sp_params=None):
        """Generates a Fireworks Workflow to find the structures and energies of
        the reactants and products of a single reaction.

        Note: as written now, this function will only work if self.subdirs is
        True; that is, only if each reaction is in a separate subdirectory.
        Later additions could allow for some other means of specifying the
        separate reactions within a single directory.

        :param name_pre: str indicating the prefix which should be used for all
        Firework names
        :param max_cores: int specifying number of processes/threads that can
        be used for this workflow.
        :param qchem_input_params: dict
        :param sp_params: For OptFreqSPFW, single-point calculations can be
        treated differently from Opt and Freq. In this case, another dict
        for sp must be used.

        :return: Workflow
        """

        if not self.subdirs:
            raise RuntimeError("Cannot run get_reaction_set_workflow();"
                               "Need reactions components to be isolated in"
                               "different subdirectories.")

        fws = []

        dirs = [
            d for d in listdir(self.base_dir) if isdir(join(self.base_dir, d))
        ]

        # Only set up a workflow if it is worthwhile (the reaction actually
        # proceeds as written, and all atoms add up)
        appropriate_dirs = self.check_appropriate_dirs(dirs)

        if self.db is not None:
            all_fws = self.db.collection.find()

            # Keep track of which molecules have already been run as jobs before
            molecules_registered = [
                extract_id(fw["task_label"]) for fw in all_fws
            ]
        else:
            molecules_registered = []

        for d in appropriate_dirs:
            path = join(self.base_dir, d)
            files = [
                f for f in listdir(path)
                if isfile(join(path, f)) and f.endswith(".mol")
            ]
            rcts = [f for f in files if f.startswith(self.reactant_pre)]
            pros = [f for f in files if f.startswith(self.product_pre)]

            for i, rct in enumerate(rcts):
                mol_id = rct.rstrip(".mol").split("_")[-1]

                if mol_id in molecules_registered:
                    continue
                else:
                    molecules_registered.append(mol_id)

                mol = get_molecule(join(self.base_dir, d, rct))

                infile = join(path, self.reactant_pre + str(i) + ".in")
                outfile = join(path, self.reactant_pre + str(i) + ".out")

                fw = OptFreqSPFW(molecule=mol,
                                 name="{}: {}/{}".format(name_pre, d, rct),
                                 qchem_cmd="qchem -slurm",
                                 input_file=infile,
                                 output_file=outfile,
                                 qclog_file=join(
                                     path,
                                     self.reactant_pre + str(i) + ".qclog"),
                                 max_cores=max_cores,
                                 qchem_input_params=qchem_input_params,
                                 sp_params=sp_params,
                                 db_file=self.db_file)

                fws.append(fw)

            for i, pro in enumerate(pros):
                mol_id = pro.rstrip(".mol").split("_")[-1]

                if mol_id in molecules_registered:
                    continue
                else:
                    molecules_registered.append(mol_id)

                mol = get_molecule(join(self.base_dir, d, pro))

                infile = join(path, self.product_pre + str(i) + ".in")
                outfile = join(path, self.product_pre + str(i) + ".out")

                fw = OptFreqSPFW(molecule=mol,
                                 name="{}: {}/{}".format(name_pre, d, pro),
                                 qchem_cmd="qchem -slurm",
                                 input_file=infile,
                                 output_file=outfile,
                                 qclog_file=join(
                                     path,
                                     self.product_pre + str(i) + ".qclog"),
                                 max_cores=max_cores,
                                 qchem_input_params=qchem_input_params,
                                 sp_params=sp_params,
                                 db_file=self.db_file)

                fws.append(fw)

        return Workflow(fws)

Esempio n. 6

Mostra file

    def get_single_reaction_workflow(self,
                                     name_pre="opt_freq_sp",
                                     path=None,
                                     filenames=None,
                                     max_cores=32,
                                     qchem_input_params=None,
                                     sp_params=None):
        """
        Generates a Fireworks Workflow to find the structures and energies of
        the reactants and products of a single reaction.

        :param name_pre: str indicating the prefix which should be used for all
        Firework names
        :param path: Specified (sub)path in which to run the reaction. By
        default, this is None, and the Fireworks will run in self.base_dir
        :param filenames: Specified files within the path (if self.base_dir or
        a subdirectory) that should be considered a part of this reaction. If
        None, assume all files in the directory are to be involved.
        :param max_cores: int specifying number of processes/threads that can
        be used for this workflow.
        :param qchem_input_params: dict
        :param sp_params: For OptFreqSPFW, single-point calculations can be
        treated differently from Opt and Freq. In this case, another dict
        for sp must be used.
        :return: Workflow
        """

        fws = []

        if self.subdirs:
            base_path = join(self.base_dir, path)
        else:
            base_path = self.base_dir

        if filenames:
            rcts = [
                f for f in filenames
                if f.startswith(self.reactant_pre) and f.endswith(".mol")
            ]
            pros = [
                f for f in filenames
                if f.startswith(self.product_pre) and f.endswith(".mol")
            ]
            print(rcts)
            print(pros)
        else:
            # Assume that every file in the directory is part of the reaction
            files = [
                f for f in listdir(base_path)
                if isfile(join(base_path, f)) and f.endswith(".mol")
            ]
            rcts = [f for f in files if f.startswith(self.reactant_pre)]
            pros = [f for f in files if f.startswith(self.product_pre)]

        for i, rct in enumerate(rcts):
            mol = get_molecule(join(base_path, rct))

            infile = join(base_path, self.reactant_pre + str(i) + ".in")
            outfile = join(base_path, self.reactant_pre + str(i) + ".out")

            fw = OptFreqSPFW(molecule=mol,
                             name="{}: {}/{}".format(name_pre, path, rct),
                             qchem_cmd="qchem -slurm",
                             input_file=infile,
                             output_file=outfile,
                             qclog_file=join(
                                 base_path,
                                 self.reactant_pre + str(i) + ".qclog"),
                             max_cores=max_cores,
                             qchem_input_params=qchem_input_params,
                             sp_params=sp_params,
                             db_file=self.db_file)

            fws.append(fw)

        for i, pro in enumerate(pros):
            mol = get_molecule(join(base_path, pro))

            infile = join(base_path, self.product_pre + str(i) + ".in")
            outfile = join(base_path, self.product_pre + str(i) + ".out")

            fw = OptFreqSPFW(molecule=mol,
                             name="{}: {}/{}".format(name_pre, path, pro),
                             qchem_cmd="qchem -slurm",
                             input_file=infile,
                             output_file=outfile,
                             qclog_file=join(
                                 base_path,
                                 self.product_pre + str(i) + ".qclog"),
                             max_cores=max_cores,
                             qchem_input_params=qchem_input_params,
                             sp_params=sp_params,
                             db_file=self.db_file)

            fws.append(fw)

        return Workflow(fws)

Esempio n. 7

Mostra file

    def get_single_molecule_workflow(self,
                                     mol_id,
                                     name_pre="opt_freq_sp",
                                     path=None,
                                     max_cores=32,
                                     max_iterations=1,
                                     max_perturb_scale=0.3,
                                     qchem_input_params=None,
                                     sp_params=None):
        """
        Generates a Fireworks Workflow to find the structures and energies of
        the reactants and products of a single reaction.

        :param mol_id: ID string for molecule to be analyzed
        :param name_pre: str indicating the prefix which should be used for all
        Firework names
        :param path: str indicating subdirectory where calculation should take
        place
        :param max_cores: int specifying number of processes/threads that can
        be used for this workflow.
        :param max_iterations: For opt-freq-sp workflow, multiple iteractions
        can be performed in case of negative frequencies. By default, no such
        "frequency flattening" is allowed (max_iterations=1); in general, 3
        is recommended.
        :param max_perturb_scale (float): The maximum scaled perturbation that can be
        applied to the molecule. Defaults to 0.3.
        :param qchem_input_params: dict
        :param sp_params: For OptFreqSPFW, single-point calculations can be
        treated differently from Opt and Freq. In this case, another dict
        for sp must be used.
        :return: Workflow
        """

        fws = []

        if self.subdirs:
            base_path = join(self.base_dir, path)
        else:
            base_path = self.base_dir

        # Assume that every file in the directory is part of the reaction
        file = [
            f for f in listdir(base_path) if isfile(join(base_path, f))
            and f.startswith(mol_id) and f.endswith(".mol")
        ][0]

        mol = get_molecule(join(base_path, file))

        infile = join(base_path, mol_id + ".qin")
        outfile = join(base_path, mol_id + ".qout")
        qclogfile = join(base_path, mol_id + ".qclog")

        fw = OptFreqSPFW(molecule=mol,
                         name="{}: {}/{}".format(name_pre, path, file),
                         qchem_cmd="qchem -slurm",
                         input_file=infile,
                         output_file=outfile,
                         qclog_file=qclogfile,
                         max_cores=max_cores,
                         max_iterations=max_iterations,
                         max_molecule_perturb_scale=max_perturb_scale,
                         qchem_input_params=qchem_input_params,
                         sp_params=sp_params,
                         db_file=self.db_file)

        fws.append(fw)

        return Workflow(fws)

Esempio n. 8

Mostra file

    def get_reaction_workflow(self,
                              rxn_id,
                              mol_dir=None,
                              name_pre="reaction_opt_freq",
                              qchem_cmd="qchem -slurm",
                              max_cores=32,
                              qchem_input_params=None,
                              max_iterations=3,
                              max_perturb_scale=0.3):
        """
        Generates a Fireworks Workflow to perform geometry optimizations and
        vibrational analyses on all of the molecules involved in a chemical
        reaction.

        :param rxn_id: str representing unique reaction identifier.
        :param mol_dir: str indicating a subdirectory (from self.base_dir)
        where molecule calculations should be stored. Default is None,
        indicating that all calculations should be done within self.base_dir.
        :param name_pre: str indicating the prefix which should be used for all
        Firework names
        :param qchem_cmd: str indicating how the Q-Chem code should be called.
        Default is "qchem -slurm", for a SLURM-based system.
        :param max_cores: int specifying how many cores the workflow should be
        split over. Default is 32.
        :param qchem_input_params: dict listing all parameters differing from
        default values.
        :param max_iterations (int): Number of perturbation -> optimization -> frequency
        iterations to perform. Defaults to 3.
        :param max_perturb_scale (float): The maximum scaled perturbation that can be
        applied to the molecule. Defaults to 0.3.
        :return: Workflow
        """

        fws = []

        if mol_dir is not None:
            base_path = join(self.base_dir, mol_dir)
        else:
            base_path = self.base_dir

        mol_dirs = [
            d for d in listdir(base_path)
            if isdir(join(base_path, d)) and not ("atomate" in d)
        ]

        rxn = self.reactions.find_one({"rxn_id": rxn_id})

        if rxn is None:
            raise RuntimeError(
                "No reaction with id {} found in database.".format(rxn_id))

        mol_ids = [str(i) for i in rxn["pro_ids"] + rxn["rct_ids"]]

        for mol_id in mol_ids:
            mol_path = join(base_path, mol_id)
            if mol_id not in mol_dirs:
                os.mkdir(mol_path)

            os.chdir(mol_path)

            # Search for molecule in previous calculations
            result = self.molecules.find_one({"mol_id": mol_id})

            if result is None:
                mol_files = [
                    f for f in listdir(mol_path)
                    if isfile(join(mol_path, f)) and f.endswith(".mol")
                ]

                if len(mol_files) == 0:
                    raise RuntimeError("Molecule not found in database or file"
                                       " system.")
                elif len(mol_files) > 1:
                    print("More than one valid *.mol file available.")
                    print("Selecting one for analysis.")

                mol = get_molecule(join(mol_path, mol_files[0]))

            else:
                entry = result["output"].get(
                    'optimized_molecule',
                    result["output"].get('initial_molecule'))
                mol = Molecule.from_dict(entry)

            fw = FrequencyFlatteningOptimizeFW(
                molecule=mol,
                name=name_pre + "_{}".format(mol_id),
                qchem_cmd=qchem_cmd,
                qchem_input_params=qchem_input_params,
                multimode="openmp",
                max_cores=max_cores,
                directory=mol_path,
                max_iterations=max_iterations,
                max_molecule_perturb_scale=max_perturb_scale,
                db_file=self.db_file)

            fws.append(fw)

        return Workflow(fws)

Esempio n. 9

Mostra file

    def copy_outputs_across_directories(self):
        """
        Copy output files between subdirectories to ensure that all reaction
        directories that need outputs of a given molecule will have them.

        Note: This function should not be used unless necessary. It was written
        because for each directory, only a single database entry was being made
        (because db entries were being overwritten by default.

        :return:
        """

        files_copied = 0

        dirs = [
            d for d in listdir(self.base_dir)
            if isdir(join(self.base_dir, d)) and not d.startswith("block")
        ]
        print("Number of directories: {}".format(len(dirs)))

        for start_d in dirs:
            start_p = join(self.base_dir, start_d)
            mol_files = [
                f for f in listdir(start_p)
                if isfile(join(start_p, f)) and f.endswith(".mol")
            ]
            out_files = [
                f for f in listdir(start_p)
                if isfile(join(start_p, f)) and ".out" in f
            ]

            for mf in mol_files:
                is_covered = False
                mol_id = extract_id(mf)

                mol_obj = get_molecule(join(start_p, mf))

                for out in out_files:
                    qcout = QCOutput(join(start_p, out))
                    if sorted(
                            qcout.data["initial_molecule"].species) == sorted(
                                mol_obj.species):
                        # If there is already output, do not copy any files
                        is_covered = True

                if is_covered:
                    continue

                for other_d in dirs:
                    if other_d == start_d:
                        continue
                    if is_covered:
                        break

                    other_p = join(self.base_dir, other_d)
                    # Check if this id is present
                    other_mol_files = [
                        f for f in listdir(other_p) if isfile(join(other_p, f))
                        and f.endswith(".mol") and mol_id in f
                    ]
                    other_out_files = [
                        f for f in listdir(other_p)
                        if isfile(join(other_p, f)) and ".out" in f
                    ]
                    to_copy = []
                    for other_mol in other_mol_files:
                        if other_mol.startswith(self.product_pre):
                            to_copy = [
                                f for f in other_out_files
                                if f.startswith(self.product_pre)
                            ]
                        elif other_mol.startswith(self.reactant_pre):
                            to_check = [
                                f for f in other_out_files
                                if f.startswith(self.reactant_pre)
                            ]
                            to_copy = []
                            for file in to_check:
                                qcout = QCOutput(join(other_p, file))
                                if qcout.data[
                                        "initial_molecule"].species == mol_obj.species:
                                    to_copy.append(file)
                        else:
                            to_copy = []
                    for file in to_copy:
                        shutil.copyfile(join(other_p, file),
                                        join(start_p, file + "_copy"))
                        files_copied += 1

                    if files_copied > 0:
                        is_covered = True
        print("Number of files copied: {}".format(files_copied))