Example #1
0
def get_corrected_gbl(halide, cation, ion="Pb"):
    """
    Retrun the corrected UMBO for simulations using the GBL solvent.

    **Parameters**

        halide: *list, str or str*
            A list of strings specifying what halide combination was used.
            If only one string is passed, then it is assumed to be uniform.
        cation: *str*
            The cation used.

    **Return**

        UMBO: *float*
            Corrected UMBO. Returns the average UMBO between the two oxygens
            of GBL.
    """
    name = fpl_utils.reduce_to_name(ion, halide, cation) + "_gbl_orca_1"
    data = orca.read(name)

    # Now that we have the data, we need to get the MBO's of interest
    MBOs_with_O = [
        atoms for atoms in data.MBO if "O" in [a.element for a in atoms[0]]
    ]
    O_indices = [
        a.index for atoms in MBOs_with_O for a in atoms[0] if a.element == "O"
    ]

    FBO = [1 if O_indices.count(i) == 2 else 2 for i in O_indices]
    MBOS = [m[1] for m in MBOs_with_O]
    UMBO = [fbo - mbo for fbo, mbo in zip(FBO, MBOS)]
    return sum(UMBO) / len(UMBO)
Example #2
0
def minimize_seeds(nprocs=4):
	seeds = []
	seed_names = []
	route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7"
	extra_section_low = ""
	route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7"
	extra_section_high = ""
	for seed in os.listdir("seed"):
		seeds.append(files.read_cml("seed/%s" % seed, allow_errors=True, test_charges=False)[0])
		seed_names.append(seed.split(".cml")[0])
	jobs = []
	for i,seed in enumerate(seeds):
		charge = sum([a.type.charge for a in seed])
		jobs.append( orca.job("seed_%d_low" % i, route_low, atoms=seed, extra_section=extra_section_low, charge=charge, grad=False, queue=QUEUE_TO_RUN_ON, procs=nprocs, sandbox=False) )
	for j in jobs: j.wait()
	for i,seed in enumerate(seeds):
		charge = sum([a.type.charge for a in seed])
		jobs.append( orca.job("seed_%d_high" % i, route_high, atoms=[], extra_section=extra_section_high, charge=charge, grad=False, queue=QUEUE_TO_RUN_ON, procs=nprocs, previous="seed_%d_low" % i, sandbox=False) )
	for j in jobs: j.wait()
	for i,seed in enumerate(seeds):
		new_pos = orca.read("seed_%d_high" % i)
		if not new_pos.converged:
			print("Failed to optimize %s" % seed_names[i])
			continue
		new_pos = new_pos.atoms
		cml_file = files.read_cml("seed/%s" % seed_names[i], allow_errors=True, test_charges=False, return_molecules=True)
		j=0
		for mol in cml_file:
			for k,a in enumerate(mol.atoms):
				b = new_pos[j]
				a.x, a.y, a.z = b.x, b.y, b.z
				j += 1
		files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])
Example #3
0
def get_corrected_nitromethane(halide, cation, ion="Pb"):
    """
    Retrun the corrected UMBO for simulations using the nitromethane solvent.

    **Parameters**

        halide: *list, str or str*
            A list of strings specifying what halide combination was used.
            If only one string is passed, then it is assumed to be uniform.
        cation: *str*
            The cation used.

    **Return**

        UMBO: *float*
            Corrected UMBO
    """
    name = fpl_utils.reduce_to_name(ion, halide, cation)
    name += "_nitromethane_orca_1"
    data = orca.read(name)

    # Now that we have the data, we need to get the MBO's of interest
    MBOs_with_O = [
        atoms for atoms in data.MBO if "O" in [a.element for a in atoms[0]]
    ]

    return sum([1.5 - m[1] for m in MBOs_with_O]) / len(MBOs_with_O)
Example #4
0
 def mbo(self, criteria=[["O", "C"], ["O", "N"], ["O", "S"]], avg=True):
     if not self.is_finished():
         return None
     else:
         import orca
         import fpl_auto
         name = self.name.split("_")
         name.insert(2, "orca")
         name = "_".join(name)
         data = orca.read(name)
         if not data.converged:
             return -1
         else:
             mbo = fpl_auto.get_mbo_given_criteria(
                 data.MBO, criteria, avg)
             return mbo
Example #5
0
def run_high_level(training_sets_folder="training_set",
                   procs=1,
                   queue=None,
                   extra_parameters={}):
    if not os.path.exists(training_sets_folder):
        raise Exception("No training set folder to run.")

    frange = [
        int(a.split('.cml')[0]) for a in os.listdir(training_sets_folder)
        if a.endswith(".cml")
    ]
    frange.sort()
    if len(frange) == 0:
        raise Exception("No viable files in training sets folder.")

    route = "! PW6B95 def2-TZVP GCP(DFT/TZ) ECP{def2-TZVP} Grid7"
    extra_section = ""

    running_jobs = []
    previous_failed = []
    for i in frange:
        atoms = files.read_cml("%s/%d.cml" % (training_sets_folder, i),
                               allow_errors=True,
                               test_charges=False,
                               return_molecules=False,
                               extra_parameters=extra_parameters)[0]
        charge = sum([a.type.charge for a in atoms])
        prev_converged = orca.read("ts_%d" % i).converged
        if prev_converged:
            running_jobs.append(
                orca.job("ts_%d_high" % i,
                         route,
                         atoms=[],
                         extra_section=extra_section,
                         charge=charge,
                         grad=True,
                         queue=queue,
                         procs=procs,
                         previous="ts_%d" % i,
                         sandbox=False))
        else:
            previous_failed.append(i)
    return running_jobs, previous_failed
Example #6
0
def run_high_level():
	if not os.path.exists("training_sets"):
		raise Exception("No training set folder to run.")

	frange = [int(a.split('.cml')[0]) for a in os.listdir("training_sets") if a.endswith(".cml")]
	frange.sort()
	if len(frange) == 0:
		raise Exception("No viable files in training sets folder.")

	route = "! PW6B95 def2-TZVP GCP(DFT/TZ) ECP{def2-TZVP} Grid7"
	extra_section = ""

	running_jobs = []
	previous_failed = []
	for i in frange:
		atoms = files.read_cml("training_sets/%d.cml" % i, allow_errors=True, test_charges=False, return_molecules=False)[0]
		charge = sum([a.type.charge for a in atoms])
		prev_converged = orca.read("ts_%d" % i).converged
		if prev_converged:
			running_jobs.append( orca.job("ts_%d_high" % i, route, atoms=[], extra_section=extra_section, charge=charge, grad=True, queue=QUEUE_TO_RUN_ON, procs=QUEUE_PROCS, previous="ts_%d" % i, sandbox=False) )
		else:
			previous_failed.append(i)
	return running_jobs, previous_failed
Example #7
0
	vmd = True
# Check if me is forced
if '-me' in sys.argv:
	me = True


# Read in data
if dft == 'g09':
	try:
		data = g09.read(run_name)
	except IOError:
		print("Error - g09 simulation %s does not exist. Are you sure -dft g09 is correct?" % run_name)
		sys.exit()
elif dft == 'orca':
	try:
		data = orca.read(run_name)
	except IOError:
		print("Error - orca simulation %s does not exist. Are you sure -dft orca is correct?" % run_name)
		sys.exit()
else:
	print("DFT type %s not available..." % dft)
	sys.exit()

# Get the header information
head = 'Job Name: %s\n' % run_name
head += 'DFT Simmulation in %s\n' % dft
head += 'Energy Data Points: %d\n' % len(data.energies)
if len(data.energies) > 2:
        Ener = str(units.convert_energy(u1, u2, data.energies[-2] - data.energies[-3]))
        head += 'dE 2nd last = %s %s\n' % (Ener,u2)
if len(data.energies) > 1:
Example #8
0
def minimize_seeds(procs=4, queue=None, extra_parameters={}):
    """
    A function to optimize the geometry of the supplied seeds in the seed
    directory.  Each optimized structure is then added to the seed directory
    under the name "previous_name_opt.cml".

    **Parameters**

        procs: *int, optional*
            How many processors to use for this.
        queue: *str, optional*
            What queue to run the simulation on.
    """
    seeds = []
    seed_names = []
    route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7"
    extra_section_low = ""
    route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7"
    extra_section_high = ""
    for seed in os.listdir("seed"):
        seeds.append(
            files.read_cml("seed/%s" % seed,
                           allow_errors=True,
                           test_charges=False,
                           extra_parameters=extra_parameters)[0])
        seed_names.append(seed.split(".cml")[0])
    jobs = []
    for i, seed in enumerate(seeds):
        charge = sum([a.type.charge for a in seed])
        jobs.append(
            orca.job("seed_%d_low" % i,
                     route_low,
                     atoms=seed,
                     extra_section=extra_section_low,
                     charge=charge,
                     grad=False,
                     queue=queue,
                     procs=procs,
                     sandbox=False))
    for j in jobs:
        j.wait()
    for i, seed in enumerate(seeds):
        charge = sum([a.type.charge for a in seed])
        jobs.append(
            orca.job("seed_%d_high" % i,
                     route_high,
                     atoms=[],
                     extra_section=extra_section_high,
                     charge=charge,
                     grad=False,
                     queue=queue,
                     procs=procs,
                     previous="seed_%d_low" % i,
                     sandbox=False))
    for j in jobs:
        j.wait()
    for i, seed in enumerate(seeds):
        new_pos = orca.read("seed_%d_high" % i)
        if not new_pos.converged:
            print("Failed to optimize %s" % seed_names[i])
            continue
        new_pos = new_pos.atoms
        cml_file = files.read_cml("seed/%s" % seed_names[i],
                                  allow_errors=True,
                                  test_charges=False,
                                  return_molecules=True,
                                  extra_parameters=extra_parameters)
        j = 0
        for mol in cml_file:
            for k, a in enumerate(mol.atoms):
                b = new_pos[j]
                a.x, a.y, a.z = b.x, b.y, b.z
                j += 1
        files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])
Example #9
0
def pickle_training_set(run_name,
                        training_sets_folder="training_set",
                        pickle_file_name="training_set",
                        high_energy_cutoff=500.0,
                        system_x_offset=1000.0,
                        verbose=False,
                        extra_parameters={}):
    """
    A function to pickle together the training set in a manner that is
    readable for MCSMRFF.  This is a single LAMMPs data file with each
    training set offset alongst the x-axis by system_x_offset.  The pickle
    file, when read in later, holds a list of two objects.  The first is
    the entire system as described above.  The second is a dictionary of all
    molecules in the system, organized by composition.

    **Parameters**

        run_name: *str*
            Name of final training set.
        training_sets_folder: *str, optional*
            Path to the folder where all the training set data is.
        pickle_file_name: *str, optional*
            A name for the pickle file and training set system.
        high_energy_cutoff: *float, optional*
            A cutoff for systems that are too large in energy, as MD is likely
            never to sample them.
        system_x_offset: *float, optional*
            The x offset for the systems to be added by.
        verbose: *bool, optional*
            Whether to have additional stdout or not.
        extra_parameters: *dict, optional*
            A dictionaries for additional parameters that do not exist
            in the default OPLSAA parameter file.

    **Returns**

        system: *System*
            The entire training set system.
        systems_by_composition: *dict, list, Molecule*
            Each molecule organized in this hash table.
    """
    # Take care of pickle file I/O
    if training_sets_folder.endswith("/"):
        training_sets_folder = training_sets_folder[:-1]
    if pickle_file_name is not None and pickle_file_name.endswith(".pickle"):
        pickle_file_name = pickle_file_name.split(".pickle")[0]
    pfile = training_sets_folder + "/" + pickle_file_name + ".pickle"
    sys_name = pickle_file_name
    if os.path.isfile(pfile):
        raise Exception("Pickled training set already exists!")

    # Generate empty system for your training set
    system = None
    system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name)
    systems_by_composition = {}

    # For each folder in the training_sets folder lets get the cml file we
    # want and write the energies and forces for that file
    for name in os.listdir(training_sets_folder):
        # We'll read in any training subset that succeeded and print a warning
        # on those that failed
        try:
            result = orca.read("%s/%s/%s.out" %
                               (training_sets_folder, name, name))
        except IOError:
            print(
                "Warning - Training Subset %s not included as \
out file not found..." % name)
            continue

        # Check for convergence
        if not result.converged:
            print("Warning - Results for %s have not converged." % name)
            continue

        # Parse the force output and change units. In the case of no force
        # found, do not use this set of data
        try:
            forces = orca.engrad_read("%s/%s/%s.orca.engrad" %
                                      (training_sets_folder, name, name),
                                      pos="Ang")[0]

            # Convert force from Ha/Bohr to kcal/mol-Ang
            def convert(x):
                return units.convert_dist(
                    "Ang", "Bohr", units.convert_energy("Ha", "kcal", x))

            for a, b in zip(result.atoms, forces):
                a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz)
        except (IndexError, IOError):
            print(
                "Warning - Training Subset %s not included as \
results not found..." % name)
            continue

        # Get the bonding information
        with_bonds = structures.Molecule("%s/%s/%s.cml" %
                                         (training_sets_folder, name, name),
                                         extra_parameters=extra_parameters,
                                         allow_errors=True,
                                         test_charges=False)

        # Copy over the forces read in into the system that has the bonding
        # information
        for a, b in zip(with_bonds.atoms, result.atoms):
            a.fx, a.fy, a.fz = b.fx, b.fy, b.fz
            # sanity check on atom positions
            if geometry.dist(a, b) > 1e-4:
                raise Exception('Atoms are different:', (a.x, a.y, a.z),
                                (b.x, b.y, b.z))

        # Rename and save energy
        with_bonds.energy = result.energy
        with_bonds.name = name

        # Now, we read in all the potential three-body interactions that our
        # training set takes into account.  This will be in a 1D array
        composition = ' '.join(sorted([a.element for a in result.atoms]))
        if composition not in systems_by_composition:
            systems_by_composition[composition] = []
        systems_by_composition[composition].append(with_bonds)

    # Generate:
    #  (1) xyz file of various systems as different time steps
    #  (2) system to simulate
    xyz_atoms = []
    to_delete = []
    for i, composition in enumerate(systems_by_composition):
        # Sort so that the lowest energy training subset is first
        # in the system
        systems_by_composition[composition].sort(key=lambda s: s.energy)
        baseline_energy = systems_by_composition[composition][0].energy
        # Offset the energies by the lowest energy, and convert energy units
        for j, s in enumerate(systems_by_composition[composition]):
            s.energy -= baseline_energy
            s.energy = units.convert_energy("Ha", "kcal/mol", s.energy)
            # Don't use high-energy systems, because these will not likely
            # be sampled in MD
            if s.energy > high_energy_cutoff:
                to_delete.append([composition, j])
                continue
            # For testing purposes, output
            if verbose:
                print "Using:", s.name, s.energy
            xyz_atoms.append(s.atoms)
            system.add(s, len(system.molecules) * system_x_offset)

    # Delete the system_names that we aren't actually using due to energy
    # being too high
    to_delete = sorted(to_delete, key=lambda x: x[1])[::-1]
    for d1, d2 in to_delete:
        if verbose:
            print "Warning - Training Subset %s not included as energy \
is too high..." % systems_by_composition[d1][d2].name
        del systems_by_composition[d1][d2]

    # Make the box just a little bigger (100) so that we can fit all our
    # systems
    system.xhi = len(system.molecules) * system_x_offset + 100.0

    # Write all of the states we are using to training_sets.xyz
    files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name)
    # Generate our pickle file
    print("Saving pickle file %s..." % pfile)
    fptr = open(pfile, "wb")
    pickle.dump([system, systems_by_composition], fptr)
    fptr.close()

    # Now we have the data, save it to files for this simulation of
    # "run_name" and return parameters
    if not os.path.isdir(run_name):
        os.mkdir(run_name)
    os.chdir(run_name)
    mcsmrff_files.write_system_and_training_data(run_name, system,
                                                 systems_by_composition)
    os.chdir("../")
    shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name))

    return system, systems_by_composition
Example #10
0
def generate_lead_halide_cation(halide, cation, ion="Pb", run_opt=True):
    cml_path = fpl_constants.cml_dir
    # Check if system exists
    fname = reduce_to_name(ion, halide, cation)
    if not cml_path.endswith("/"):
        cml_path += "/"

    if os.path.exists(cml_path + fname + ".cml"):
        print("Found system in cml folder, returning system")
        system = structures.Molecule(
            files.read_cml(cml_path + fname + ".cml",
                           test_charges=False,
                           allow_errors=True)[0])
        return system

    def vdw(y):
        return PERIODIC_TABLE[units.elem_s2i(y)]['vdw_r']

    # Get the PbX3 system
    PbX3 = generate_lead_halide(halide, ion=ion)
    # Get the cation from the cml file
    atoms, bonds, _, _ = files.read_cml(cml_path + cation + ".cml",
                                        test_charges=False,
                                        allow_errors=True)
    system = structures.Molecule(atoms)
    # Align along X axis
    system.atoms = geometry.align_centroid(system.atoms)[0]
    # Rotate to Z axis
    # NOTE! In case of FA, we want flat so only translate to origin instead
    # NOTE! We have exactly 3 cations we observe: Cs, MA, FA. If 2 N, then FA
    elems = [a.element for a in system.atoms]
    if elems.count("N") == 2:
        system.translate(system.get_center_of_mass())
    else:
        R = geometry.rotation_matrix([0, 1, 0], 90, units="deg")
        system.rotate(R)
    # If N and C in system, ensure N is below C (closer to Pb)
    if "N" in elems and "C" in elems:
        N_index = [i for i, a in enumerate(system.atoms)
                   if a.element == "N"][0]
        C_index = [i for i, a in enumerate(system.atoms)
                   if a.element == "C"][0]
        if system.atoms[N_index].z > system.atoms[C_index].z:
            # Flip if needed
            R = geometry.rotation_matrix([0, 1, 0], 180, units="deg")
            system.rotate(R)
    # Offset system so lowest point is at 0 in the z dir
    z_offset = min([a.z for a in system.atoms]) * -1
    system.translate([0, 0, z_offset])

    # Add to the PbX3 system with an offset of vdw(Pb)
    system.translate([0, 0, vdw(ion)])
    system.atoms += PbX3.atoms

    # Run a geometry optimization of this system
    if run_opt:
        PbXY = orca.job(fname,
                        fpl_constants.default_routes[0],
                        atoms=system.atoms,
                        extra_section=fpl_constants.extra_section,
                        queue="batch",
                        procs=2)
        PbXY.wait()
        new_pos = orca.read(fname).atoms
        for a, b in zip(system.atoms, new_pos):
            a.x, a.y, a.z = [b.x, b.y, b.z]

    # Set OPLS types
    for a in system.atoms:
        if a.element in [ion, "Cl", "Br", "I"]:
            a.type = fpl_constants.atom_types[a.element]
            a.type_index = a.type["index"]

    # Write cml file so we don't re-generate, and return system
    files.write_cml(system, bonds=bonds, name=cml_path + fname + ".cml")
    return system
Example #11
0
def minimize_seeds(procs=4, queue=None, extra_parameters={}):
    """
    A function to optimize the geometry of the supplied seeds in the seed
    directory.  Each optimized structure is then added to the seed directory
    under the name "previous_name_opt.cml".

    **Parameters**

        procs: *int, optional*
            How many processors to use for this.
        queue: *str, optional*
            What queue to run the simulation on.
    """
    seeds = []
    seed_names = []
    route_low = "! OPT B97-D3 def2-TZVP ECP{def2-TZVP} Grid7"
    extra_section_low = ""
    route_high = "! OPT PW6B95 def2-TZVP ECP{def2-TZVP} Grid7"
    extra_section_high = ""
    for seed in os.listdir("seed"):
        seeds.append(files.read_cml("seed/%s" % seed,
                                    allow_errors=True,
                                    test_charges=False,
                                    extra_parameters=extra_parameters)[0])
        seed_names.append(seed.split(".cml")[0])
    jobs = []
    for i, seed in enumerate(seeds):
        charge = sum([a.type.charge for a in seed])
        jobs.append(orca.job("seed_%d_low" % i, route_low,
                             atoms=seed,
                             extra_section=extra_section_low,
                             charge=charge,
                             grad=False,
                             queue=queue,
                             procs=procs,
                             sandbox=False))
    for j in jobs:
        j.wait()
    for i, seed in enumerate(seeds):
        charge = sum([a.type.charge for a in seed])
        jobs.append(orca.job("seed_%d_high" % i, route_high,
                             atoms=[],
                             extra_section=extra_section_high,
                             charge=charge,
                             grad=False,
                             queue=queue,
                             procs=procs,
                             previous="seed_%d_low" % i,
                             sandbox=False))
    for j in jobs:
        j.wait()
    for i, seed in enumerate(seeds):
        new_pos = orca.read("seed_%d_high" % i)
        if not new_pos.converged:
            print("Failed to optimize %s" % seed_names[i])
            continue
        new_pos = new_pos.atoms
        cml_file = files.read_cml("seed/%s" % seed_names[i],
                                  allow_errors=True,
                                  test_charges=False,
                                  return_molecules=True,
                                  extra_parameters=extra_parameters)
        j = 0
        for mol in cml_file:
            for k, a in enumerate(mol.atoms):
                b = new_pos[j]
                a.x, a.y, a.z = b.x, b.y, b.z
                j += 1
        files.write_cml(cml_file, name="seed/%s_opt" % seed_names[i])
Example #12
0
def pickle_training_set(run_name,
                        training_sets_folder="training_set",
                        pickle_file_name="training_set",
                        high_energy_cutoff=500.0,
                        system_x_offset=1000.0,
                        verbose=False,
			extra_parameters={}):
    """
    A function to pickle together the training set in a manner that is
    readable for MCSMRFF.  This is a single LAMMPs data file with each
    training set offset alongst the x-axis by system_x_offset.  The pickle
    file, when read in later, holds a list of two objects.  The first is
    the entire system as described above.  The second is a dictionary of all
    molecules in the system, organized by composition.

    **Parameters**

        run_name: *str*
            Name of final training set.
        training_sets_folder: *str, optional*
            Path to the folder where all the training set data is.
        pickle_file_name: *str, optional*
            A name for the pickle file and training set system.
        high_energy_cutoff: *float, optional*
            A cutoff for systems that are too large in energy, as MD is likely
            never to sample them.
        system_x_offset: *float, optional*
            The x offset for the systems to be added by.
        verbose: *bool, optional*
            Whether to have additional stdout or not.
        extra_parameters: *dict, optional*
            A dictionaries for additional parameters that do not exist
            in the default OPLSAA parameter file.

    **Returns**

        system: *System*
            The entire training set system.
        systems_by_composition: *dict, list, Molecule*
            Each molecule organized in this hash table.
    """
    # Take care of pickle file I/O
    if training_sets_folder.endswith("/"):
        training_sets_folder = training_sets_folder[:-1]
    if pickle_file_name is not None and pickle_file_name.endswith(".pickle"):
        pickle_file_name = pickle_file_name.split(".pickle")[0]
    pfile = training_sets_folder + "/" + pickle_file_name + ".pickle"
    sys_name = pickle_file_name
    if os.path.isfile(pfile):
        raise Exception("Pickled training set already exists!")

    # Generate empty system for your training set
    system = None
    system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name)
    systems_by_composition = {}

    # For each folder in the training_sets folder lets get the cml file we
    # want and write the energies and forces for that file
    for name in os.listdir(training_sets_folder):
        # We'll read in any training subset that succeeded and print a warning
        # on those that failed
        try:
            result = orca.read("%s/%s/%s.out"
                               % (training_sets_folder, name, name))
        except IOError:
            print("Warning - Training Subset %s not included as \
out file not found..." % name)
            continue

        # Check for convergence
        if not result.converged:
            print("Warning - Results for %s have not converged." % name)
            continue

        # Parse the force output and change units. In the case of no force
        # found, do not use this set of data
        try:
            forces = orca.engrad_read("%s/%s/%s.orca.engrad"
                                      % (training_sets_folder, name, name),
                                      pos="Ang")[0]

            # Convert force from Ha/Bohr to kcal/mol-Ang
            def convert(x):
                return units.convert_dist("Ang", "Bohr",
                                          units.convert_energy("Ha",
                                                               "kcal",
                                                               x)
                                          )

            for a, b in zip(result.atoms, forces):
                a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz)
        except (IndexError, IOError):
            print("Warning - Training Subset %s not included as \
results not found..." % name)
            continue

        # Get the bonding information
        with_bonds = structures.Molecule("%s/%s/%s.cml"
                                         % (training_sets_folder, name, name),
                                         extra_parameters=extra_parameters,
                                         allow_errors=True,
                                         test_charges=False)

        # Copy over the forces read in into the system that has the bonding
        # information
        for a, b in zip(with_bonds.atoms, result.atoms):
            a.fx, a.fy, a.fz = b.fx, b.fy, b.fz
            # sanity check on atom positions
            if geometry.dist(a, b) > 1e-4:
                raise Exception('Atoms are different:', (a.x, a.y, a.z),
                                                        (b.x, b.y, b.z)
                                )

        # Rename and save energy
        with_bonds.energy = result.energy
        with_bonds.name = name

        # Now, we read in all the potential three-body interactions that our
        # training set takes into account.  This will be in a 1D array
        composition = ' '.join(sorted([a.element for a in result.atoms]))
        if composition not in systems_by_composition:
            systems_by_composition[composition] = []
        systems_by_composition[composition].append(with_bonds)

    # Generate:
    #  (1) xyz file of various systems as different time steps
    #  (2) system to simulate
    xyz_atoms = []
    to_delete = []
    for i, composition in enumerate(systems_by_composition):
        # Sort so that the lowest energy training subset is first
        # in the system
        systems_by_composition[composition].sort(key=lambda s: s.energy)
        baseline_energy = systems_by_composition[composition][0].energy
        # Offset the energies by the lowest energy, and convert energy units
        for j, s in enumerate(systems_by_composition[composition]):
            s.energy -= baseline_energy
            s.energy = units.convert_energy("Ha", "kcal/mol", s.energy)
            # Don't use high-energy systems, because these will not likely
            # be sampled in MD
            if s.energy > high_energy_cutoff:
                to_delete.append([composition, j])
                continue
            # For testing purposes, output
            if verbose:
                print "Using:", s.name, s.energy
            xyz_atoms.append(s.atoms)
            system.add(s, len(system.molecules) * system_x_offset)

    # Delete the system_names that we aren't actually using due to energy
    # being too high
    to_delete = sorted(to_delete, key=lambda x: x[1])[::-1]
    for d1, d2 in to_delete:
        if verbose:
            print "Warning - Training Subset %s not included as energy \
is too high..." % systems_by_composition[d1][d2].name
        del systems_by_composition[d1][d2]

    # Make the box just a little bigger (100) so that we can fit all our
    # systems
    system.xhi = len(system.molecules) * system_x_offset + 100.0

    # Write all of the states we are using to training_sets.xyz
    files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name)
    # Generate our pickle file
    print("Saving pickle file %s..." % pfile)
    fptr = open(pfile, "wb")
    pickle.dump([system, systems_by_composition], fptr)
    fptr.close()

    # Now we have the data, save it to files for this simulation of
    # "run_name" and return parameters
    if not os.path.isdir(run_name):
        os.mkdir(run_name)
    os.chdir(run_name)
    mcsmrff_files.write_system_and_training_data(run_name,
                                                 system,
                                                 systems_by_composition
                                                 )
    os.chdir("../")
    shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name))

    return system, systems_by_composition
Example #13
0
# Check if me is forced
if '-me' in sys.argv:
    me = True

# Read in data
if dft == 'g09':
    try:
        data = g09.read(run_name)
    except IOError:
        print(
            "Error - g09 simulation %s does not exist. Are you sure -dft g09 is correct?"
            % run_name)
        sys.exit()
elif dft == 'orca':
    try:
        data = orca.read(run_name)
    except IOError:
        print(
            "Error - orca simulation %s does not exist. Are you sure -dft orca is correct?"
            % run_name)
        sys.exit()
else:
    print("DFT type %s not available..." % dft)
    sys.exit()

# Get the header information
head = 'Job Name: %s\n' % run_name
head += 'DFT calculation via %s\n' % dft
head += 'Energy Data Points: %d\n' % len(data.energies)
if len(data.energies) > 2:
    Ener = str(