Example #1
0
def orca_results(spline_NEB, step_to_use, i, state):
    """
    A method for reading in the output of Orca single point calculations for
    spline_NEB calculations. This will both (a) assign forces to the atoms stored
    in state and (b) return the energy and atoms.

    **Parameters**

        spline_NEB: :class:`spline_NEB`
            A spline_NEB container holding the main spline_NEB simulation
        step_to_use: *int*
            Which iteration in the spline_NEB sequence the output to be read in is on.
        i: *int*
            The index corresponding to which image on the frame is to be
            simulated.
        state: *list,* :class:`structures.Atom`
            A list of atoms describing the image on the frame associated with
            index *i*.

    **Returns**

        new_energy: *float*
            The energy of the system in Hartree (Ha).
        new_atoms: *list,* :class:`structures.Atom`
            A list of atoms with the forces attached in units of Hartree per
            Angstrom (Ha/Ang).
    """
    read_data = orca.engrad_read('%s-%d-%d' %
                                 (spline_NEB.name, step_to_use, i),
                                 force='Ha/Ang',
                                 pos='Ang')
    new_atoms, new_energy = read_data

    for a, b in zip(state, new_atoms):
        a.fx, a.fy, a.fz = b.fx, b.fy, b.fz

    return new_energy, new_atoms
Example #2
0
def _read_orca(name):
    """
    A method for reading in the output of Orca single point calculations to
    get the atomic positions with forces.  Further, energy is also returned.

    **Parameters**

        name: *str*
            The name of the Orca simulation in questions.

    **Returns**

        new_energy: *float*
            The energy of the system in Hartree (Ha).
        new_atoms: *list,* :class:`structures.Atom`
            A list of atoms with the forces attached in units of Hartree per
            Angstrom (Ha/Ang).
    """
    read_data = orca.engrad_read(name,
                                 force='Ha/Ang',
                                 pos='Ang')
    new_atoms, new_energy = read_data

    return new_energy, new_atoms
Example #3
0
def pickle_training_set(run_name,
                        training_sets_folder="training_set",
                        pickle_file_name="training_set",
                        high_energy_cutoff=500.0,
                        system_x_offset=1000.0,
                        verbose=False,
                        extra_parameters={}):
    """
    A function to pickle together the training set in a manner that is
    readable for MCSMRFF.  This is a single LAMMPs data file with each
    training set offset alongst the x-axis by system_x_offset.  The pickle
    file, when read in later, holds a list of two objects.  The first is
    the entire system as described above.  The second is a dictionary of all
    molecules in the system, organized by composition.

    **Parameters**

        run_name: *str*
            Name of final training set.
        training_sets_folder: *str, optional*
            Path to the folder where all the training set data is.
        pickle_file_name: *str, optional*
            A name for the pickle file and training set system.
        high_energy_cutoff: *float, optional*
            A cutoff for systems that are too large in energy, as MD is likely
            never to sample them.
        system_x_offset: *float, optional*
            The x offset for the systems to be added by.
        verbose: *bool, optional*
            Whether to have additional stdout or not.
        extra_parameters: *dict, optional*
            A dictionaries for additional parameters that do not exist
            in the default OPLSAA parameter file.

    **Returns**

        system: *System*
            The entire training set system.
        systems_by_composition: *dict, list, Molecule*
            Each molecule organized in this hash table.
    """
    # Take care of pickle file I/O
    if training_sets_folder.endswith("/"):
        training_sets_folder = training_sets_folder[:-1]
    if pickle_file_name is not None and pickle_file_name.endswith(".pickle"):
        pickle_file_name = pickle_file_name.split(".pickle")[0]
    pfile = training_sets_folder + "/" + pickle_file_name + ".pickle"
    sys_name = pickle_file_name
    if os.path.isfile(pfile):
        raise Exception("Pickled training set already exists!")

    # Generate empty system for your training set
    system = None
    system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name)
    systems_by_composition = {}

    # For each folder in the training_sets folder lets get the cml file we
    # want and write the energies and forces for that file
    for name in os.listdir(training_sets_folder):
        # We'll read in any training subset that succeeded and print a warning
        # on those that failed
        try:
            result = orca.read("%s/%s/%s.out" %
                               (training_sets_folder, name, name))
        except IOError:
            print(
                "Warning - Training Subset %s not included as \
out file not found..." % name)
            continue

        # Check for convergence
        if not result.converged:
            print("Warning - Results for %s have not converged." % name)
            continue

        # Parse the force output and change units. In the case of no force
        # found, do not use this set of data
        try:
            forces = orca.engrad_read("%s/%s/%s.orca.engrad" %
                                      (training_sets_folder, name, name),
                                      pos="Ang")[0]

            # Convert force from Ha/Bohr to kcal/mol-Ang
            def convert(x):
                return units.convert_dist(
                    "Ang", "Bohr", units.convert_energy("Ha", "kcal", x))

            for a, b in zip(result.atoms, forces):
                a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz)
        except (IndexError, IOError):
            print(
                "Warning - Training Subset %s not included as \
results not found..." % name)
            continue

        # Get the bonding information
        with_bonds = structures.Molecule("%s/%s/%s.cml" %
                                         (training_sets_folder, name, name),
                                         extra_parameters=extra_parameters,
                                         allow_errors=True,
                                         test_charges=False)

        # Copy over the forces read in into the system that has the bonding
        # information
        for a, b in zip(with_bonds.atoms, result.atoms):
            a.fx, a.fy, a.fz = b.fx, b.fy, b.fz
            # sanity check on atom positions
            if geometry.dist(a, b) > 1e-4:
                raise Exception('Atoms are different:', (a.x, a.y, a.z),
                                (b.x, b.y, b.z))

        # Rename and save energy
        with_bonds.energy = result.energy
        with_bonds.name = name

        # Now, we read in all the potential three-body interactions that our
        # training set takes into account.  This will be in a 1D array
        composition = ' '.join(sorted([a.element for a in result.atoms]))
        if composition not in systems_by_composition:
            systems_by_composition[composition] = []
        systems_by_composition[composition].append(with_bonds)

    # Generate:
    #  (1) xyz file of various systems as different time steps
    #  (2) system to simulate
    xyz_atoms = []
    to_delete = []
    for i, composition in enumerate(systems_by_composition):
        # Sort so that the lowest energy training subset is first
        # in the system
        systems_by_composition[composition].sort(key=lambda s: s.energy)
        baseline_energy = systems_by_composition[composition][0].energy
        # Offset the energies by the lowest energy, and convert energy units
        for j, s in enumerate(systems_by_composition[composition]):
            s.energy -= baseline_energy
            s.energy = units.convert_energy("Ha", "kcal/mol", s.energy)
            # Don't use high-energy systems, because these will not likely
            # be sampled in MD
            if s.energy > high_energy_cutoff:
                to_delete.append([composition, j])
                continue
            # For testing purposes, output
            if verbose:
                print "Using:", s.name, s.energy
            xyz_atoms.append(s.atoms)
            system.add(s, len(system.molecules) * system_x_offset)

    # Delete the system_names that we aren't actually using due to energy
    # being too high
    to_delete = sorted(to_delete, key=lambda x: x[1])[::-1]
    for d1, d2 in to_delete:
        if verbose:
            print "Warning - Training Subset %s not included as energy \
is too high..." % systems_by_composition[d1][d2].name
        del systems_by_composition[d1][d2]

    # Make the box just a little bigger (100) so that we can fit all our
    # systems
    system.xhi = len(system.molecules) * system_x_offset + 100.0

    # Write all of the states we are using to training_sets.xyz
    files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name)
    # Generate our pickle file
    print("Saving pickle file %s..." % pfile)
    fptr = open(pfile, "wb")
    pickle.dump([system, systems_by_composition], fptr)
    fptr.close()

    # Now we have the data, save it to files for this simulation of
    # "run_name" and return parameters
    if not os.path.isdir(run_name):
        os.mkdir(run_name)
    os.chdir(run_name)
    mcsmrff_files.write_system_and_training_data(run_name, system,
                                                 systems_by_composition)
    os.chdir("../")
    shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name))

    return system, systems_by_composition
Example #4
0
def pickle_training_set(run_name,
                        training_sets_folder="training_set",
                        pickle_file_name="training_set",
                        high_energy_cutoff=500.0,
                        system_x_offset=1000.0,
                        verbose=False,
			extra_parameters={}):
    """
    A function to pickle together the training set in a manner that is
    readable for MCSMRFF.  This is a single LAMMPs data file with each
    training set offset alongst the x-axis by system_x_offset.  The pickle
    file, when read in later, holds a list of two objects.  The first is
    the entire system as described above.  The second is a dictionary of all
    molecules in the system, organized by composition.

    **Parameters**

        run_name: *str*
            Name of final training set.
        training_sets_folder: *str, optional*
            Path to the folder where all the training set data is.
        pickle_file_name: *str, optional*
            A name for the pickle file and training set system.
        high_energy_cutoff: *float, optional*
            A cutoff for systems that are too large in energy, as MD is likely
            never to sample them.
        system_x_offset: *float, optional*
            The x offset for the systems to be added by.
        verbose: *bool, optional*
            Whether to have additional stdout or not.
        extra_parameters: *dict, optional*
            A dictionaries for additional parameters that do not exist
            in the default OPLSAA parameter file.

    **Returns**

        system: *System*
            The entire training set system.
        systems_by_composition: *dict, list, Molecule*
            Each molecule organized in this hash table.
    """
    # Take care of pickle file I/O
    if training_sets_folder.endswith("/"):
        training_sets_folder = training_sets_folder[:-1]
    if pickle_file_name is not None and pickle_file_name.endswith(".pickle"):
        pickle_file_name = pickle_file_name.split(".pickle")[0]
    pfile = training_sets_folder + "/" + pickle_file_name + ".pickle"
    sys_name = pickle_file_name
    if os.path.isfile(pfile):
        raise Exception("Pickled training set already exists!")

    # Generate empty system for your training set
    system = None
    system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name)
    systems_by_composition = {}

    # For each folder in the training_sets folder lets get the cml file we
    # want and write the energies and forces for that file
    for name in os.listdir(training_sets_folder):
        # We'll read in any training subset that succeeded and print a warning
        # on those that failed
        try:
            result = orca.read("%s/%s/%s.out"
                               % (training_sets_folder, name, name))
        except IOError:
            print("Warning - Training Subset %s not included as \
out file not found..." % name)
            continue

        # Check for convergence
        if not result.converged:
            print("Warning - Results for %s have not converged." % name)
            continue

        # Parse the force output and change units. In the case of no force
        # found, do not use this set of data
        try:
            forces = orca.engrad_read("%s/%s/%s.orca.engrad"
                                      % (training_sets_folder, name, name),
                                      pos="Ang")[0]

            # Convert force from Ha/Bohr to kcal/mol-Ang
            def convert(x):
                return units.convert_dist("Ang", "Bohr",
                                          units.convert_energy("Ha",
                                                               "kcal",
                                                               x)
                                          )

            for a, b in zip(result.atoms, forces):
                a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz)
        except (IndexError, IOError):
            print("Warning - Training Subset %s not included as \
results not found..." % name)
            continue

        # Get the bonding information
        with_bonds = structures.Molecule("%s/%s/%s.cml"
                                         % (training_sets_folder, name, name),
                                         extra_parameters=extra_parameters,
                                         allow_errors=True,
                                         test_charges=False)

        # Copy over the forces read in into the system that has the bonding
        # information
        for a, b in zip(with_bonds.atoms, result.atoms):
            a.fx, a.fy, a.fz = b.fx, b.fy, b.fz
            # sanity check on atom positions
            if geometry.dist(a, b) > 1e-4:
                raise Exception('Atoms are different:', (a.x, a.y, a.z),
                                                        (b.x, b.y, b.z)
                                )

        # Rename and save energy
        with_bonds.energy = result.energy
        with_bonds.name = name

        # Now, we read in all the potential three-body interactions that our
        # training set takes into account.  This will be in a 1D array
        composition = ' '.join(sorted([a.element for a in result.atoms]))
        if composition not in systems_by_composition:
            systems_by_composition[composition] = []
        systems_by_composition[composition].append(with_bonds)

    # Generate:
    #  (1) xyz file of various systems as different time steps
    #  (2) system to simulate
    xyz_atoms = []
    to_delete = []
    for i, composition in enumerate(systems_by_composition):
        # Sort so that the lowest energy training subset is first
        # in the system
        systems_by_composition[composition].sort(key=lambda s: s.energy)
        baseline_energy = systems_by_composition[composition][0].energy
        # Offset the energies by the lowest energy, and convert energy units
        for j, s in enumerate(systems_by_composition[composition]):
            s.energy -= baseline_energy
            s.energy = units.convert_energy("Ha", "kcal/mol", s.energy)
            # Don't use high-energy systems, because these will not likely
            # be sampled in MD
            if s.energy > high_energy_cutoff:
                to_delete.append([composition, j])
                continue
            # For testing purposes, output
            if verbose:
                print "Using:", s.name, s.energy
            xyz_atoms.append(s.atoms)
            system.add(s, len(system.molecules) * system_x_offset)

    # Delete the system_names that we aren't actually using due to energy
    # being too high
    to_delete = sorted(to_delete, key=lambda x: x[1])[::-1]
    for d1, d2 in to_delete:
        if verbose:
            print "Warning - Training Subset %s not included as energy \
is too high..." % systems_by_composition[d1][d2].name
        del systems_by_composition[d1][d2]

    # Make the box just a little bigger (100) so that we can fit all our
    # systems
    system.xhi = len(system.molecules) * system_x_offset + 100.0

    # Write all of the states we are using to training_sets.xyz
    files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name)
    # Generate our pickle file
    print("Saving pickle file %s..." % pfile)
    fptr = open(pfile, "wb")
    pickle.dump([system, systems_by_composition], fptr)
    fptr.close()

    # Now we have the data, save it to files for this simulation of
    # "run_name" and return parameters
    if not os.path.isdir(run_name):
        os.mkdir(run_name)
    os.chdir(run_name)
    mcsmrff_files.write_system_and_training_data(run_name,
                                                 system,
                                                 systems_by_composition
                                                 )
    os.chdir("../")
    shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name))

    return system, systems_by_composition