def opt_with_frequency_flattener( cls, qchem_command, multimode="openmp", input_file="mol.qin", output_file="mol.qout", qclog_file="mol.qclog", max_iterations=10, max_molecule_perturb_scale=0.3, check_connectivity=True, linked=True, transition_state=False, freq_before_opt=False, save_final_scratch=False, **QCJob_kwargs, ): """ Optimize a structure and calculate vibrational frequencies to check if the structure is in a true minima. If there are an inappropriate number of imaginary frequencies (>0 for a minimum-energy structure, >1 for a transition-state), attempt to re-calculate using one of two methods: - Perturb the geometry based on the imaginary frequencies and re-optimize - Use the exact Hessian to inform a subsequent optimization After each geometry optimization, the frequencies are re-calculated to determine if a true minimum (or transition-state) has been found. Note: Very small imaginary frequencies (-15cm^-1 < nu < 0) are allowed if there is only one more than there should be. In other words, if there is one very small imaginary frequency, it is still treated as a minimum, and if there is one significant imaginary frequency and one very small imaginary frequency, it is still treated as a transition-state. Args: qchem_command (str): Command to run QChem. multimode (str): Parallelization scheme, either openmp or mpi. input_file (str): Name of the QChem input file. output_file (str): Name of the QChem output file. max_iterations (int): Number of perturbation -> optimization -> frequency iterations to perform. Defaults to 10. max_molecule_perturb_scale (float): The maximum scaled perturbation that can be applied to the molecule. Defaults to 0.3. check_connectivity (bool): Whether to check differences in connectivity introduced by structural perturbation. Defaults to True. linked (bool): Whether or not to use the linked flattener. If set to True (default), then the explicit Hessians from a vibrational frequency analysis will be used as the initial Hessian of subsequent optimizations. In many cases, this can significantly improve optimization efficiency. transition_state (bool): If True (default False), use a ts optimization (search for a saddle point instead of a minimum) freq_before_opt (bool): If True (default False), run a frequency calculation before any opt/ts searches to improve understanding of the local potential energy surface. save_final_scratch (bool): Whether to save full scratch directory contents at the end of the flattening. Defaults to False. **QCJob_kwargs: Passthrough kwargs to QCJob. See :class:`custodian.qchem.jobs.QCJob`. """ if not os.path.exists(input_file): raise AssertionError("Input file must be present!") if transition_state: opt_method = "ts" perturb_index = 1 else: opt_method = "opt" perturb_index = 0 energy_diff_cutoff = 0.0000001 orig_input = QCInput.from_file(input_file) freq_rem = copy.deepcopy(orig_input.rem) freq_rem["job_type"] = "freq" opt_rem = copy.deepcopy(orig_input.rem) opt_rem["job_type"] = opt_method first = True energy_history = [] if freq_before_opt: if not linked: warnings.warn( "WARNING: This first frequency calculation will not inform subsequent optimization!" ) yield (QCJob( qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_pre", save_scratch=True, backup=first, **QCJob_kwargs, )) if linked: opt_rem["geom_opt_hessian"] = "read" opt_rem["scf_guess_always"] = True opt_QCInput = QCInput( molecule=orig_input.molecule, rem=opt_rem, opt=orig_input.opt, pcm=orig_input.pcm, solvent=orig_input.solvent, smx=orig_input.smx, vdw_mode=orig_input.vdw_mode, van_der_waals=orig_input.van_der_waals, ) opt_QCInput.write_file(input_file) first = False if linked: opt_rem["geom_opt_hessian"] = "read" opt_rem["scf_guess_always"] = True for ii in range(max_iterations): yield (QCJob( qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".{}_".format(opt_method) + str(ii), save_scratch=True, backup=first, **QCJob_kwargs, )) opt_outdata = QCOutput(output_file + ".{}_".format(opt_method) + str(ii)).data opt_indata = QCInput.from_file(input_file + ".{}_".format(opt_method) + str(ii)) if opt_indata.rem["scf_algorithm"] != freq_rem["scf_algorithm"]: freq_rem["scf_algorithm"] = opt_indata.rem["scf_algorithm"] opt_rem["scf_algorithm"] = opt_indata.rem["scf_algorithm"] first = False if opt_outdata[ "structure_change"] == "unconnected_fragments" and not opt_outdata[ "completion"]: if not transition_state: warnings.warn( "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..." ) break energy_history.append(opt_outdata.get("final_energy")) freq_QCInput = QCInput( molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=freq_rem, opt=orig_input.opt, pcm=orig_input.pcm, solvent=orig_input.solvent, smx=orig_input.smx, vdw_mode=orig_input.vdw_mode, van_der_waals=orig_input.van_der_waals, ) freq_QCInput.write_file(input_file) yield (QCJob( qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_" + str(ii), save_scratch=True, backup=first, **QCJob_kwargs, )) outdata = QCOutput(output_file + ".freq_" + str(ii)).data indata = QCInput.from_file(input_file + ".freq_" + str(ii)) if indata.rem["scf_algorithm"] != freq_rem["scf_algorithm"]: freq_rem["scf_algorithm"] = indata.rem["scf_algorithm"] opt_rem["scf_algorithm"] = indata.rem["scf_algorithm"] errors = outdata.get("errors") if len(errors) != 0: raise AssertionError( "No errors should be encountered while flattening frequencies!" ) if not transition_state: freq_0 = outdata.get("frequencies")[0] freq_1 = outdata.get("frequencies")[1] if freq_0 > 0.0: warnings.warn("All frequencies positive!") break if abs(freq_0) < 15.0 and freq_1 > 0.0: warnings.warn( "One negative frequency smaller than 15.0 - not worth further flattening!" ) break if len(energy_history) > 1: if abs(energy_history[-1] - energy_history[-2]) < energy_diff_cutoff: warnings.warn("Energy change below cutoff!") break opt_QCInput = QCInput( molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=opt_rem, opt=orig_input.opt, pcm=orig_input.pcm, solvent=orig_input.solvent, smx=orig_input.smx, vdw_mode=orig_input.vdw_mode, van_der_waals=orig_input.van_der_waals, ) opt_QCInput.write_file(input_file) else: freq_0 = outdata.get("frequencies")[0] freq_1 = outdata.get("frequencies")[1] freq_2 = outdata.get("frequencies")[2] if freq_0 < 0.0 < freq_1: warnings.warn("Saddle point found!") break if abs(freq_1) < 15.0 and freq_2 > 0.0: warnings.warn( "Second small imaginary frequency (smaller than 15.0) - not worth further flattening!" ) break opt_QCInput = QCInput( molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=opt_rem, opt=orig_input.opt, pcm=orig_input.pcm, solvent=orig_input.solvent, smx=orig_input.smx, vdw_mode=orig_input.vdw_mode, van_der_waals=orig_input.van_der_waals, ) opt_QCInput.write_file(input_file) if not save_final_scratch: shutil.rmtree(os.path.join(os.getcwd(), "scratch")) else: orig_opt_input = QCInput.from_file(input_file) history = [] for ii in range(max_iterations): yield (QCJob( qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".{}_".format(opt_method) + str(ii), backup=first, **QCJob_kwargs, )) opt_outdata = QCOutput(output_file + ".{}_".format(opt_method) + str(ii)).data if first: orig_species = copy.deepcopy(opt_outdata.get("species")) orig_charge = copy.deepcopy(opt_outdata.get("charge")) orig_multiplicity = copy.deepcopy( opt_outdata.get("multiplicity")) orig_energy = copy.deepcopy( opt_outdata.get("final_energy")) first = False if opt_outdata[ "structure_change"] == "unconnected_fragments" and not opt_outdata[ "completion"]: if not transition_state: warnings.warn( "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..." ) break freq_QCInput = QCInput( molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=freq_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent, smx=orig_opt_input.smx, vdw_mode=orig_opt_input.vdw_mode, van_der_waals=orig_opt_input.van_der_waals, ) freq_QCInput.write_file(input_file) yield (QCJob( qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_" + str(ii), backup=first, **QCJob_kwargs, )) outdata = QCOutput(output_file + ".freq_" + str(ii)).data errors = outdata.get("errors") if len(errors) != 0: raise AssertionError( "No errors should be encountered while flattening frequencies!" ) if not transition_state: freq_0 = outdata.get("frequencies")[0] freq_1 = outdata.get("frequencies")[1] if freq_0 > 0.0: warnings.warn("All frequencies positive!") if opt_outdata.get("final_energy") > orig_energy: warnings.warn( "WARNING: Energy increased during frequency flattening!" ) break if abs(freq_0) < 15.0 and freq_1 > 0.0: warnings.warn( "One negative frequency smaller than 15.0 - not worth further flattening!" ) break if len(energy_history) > 1: if abs(energy_history[-1] - energy_history[-2]) < energy_diff_cutoff: warnings.warn("Energy change below cutoff!") break else: freq_0 = outdata.get("frequencies")[0] freq_1 = outdata.get("frequencies")[1] freq_2 = outdata.get("frequencies")[2] if freq_0 < 0.0 < freq_1: warnings.warn("Saddle point found!") break if abs(freq_1) < 15.0 and freq_2 > 0.0: warnings.warn( "Second small imaginary frequency (smaller than 15.0) - not worth further flattening!" ) break hist = {} hist["molecule"] = copy.deepcopy( outdata.get("initial_molecule")) hist["geometry"] = copy.deepcopy( outdata.get("initial_geometry")) hist["frequencies"] = copy.deepcopy(outdata.get("frequencies")) hist["frequency_mode_vectors"] = copy.deepcopy( outdata.get("frequency_mode_vectors")) hist["num_neg_freqs"] = sum( 1 for freq in outdata.get("frequencies") if freq < 0) hist["energy"] = copy.deepcopy(opt_outdata.get("final_energy")) hist["index"] = len(history) hist["children"] = [] history.append(hist) ref_mol = history[-1]["molecule"] geom_to_perturb = history[-1]["geometry"] negative_freq_vecs = history[-1]["frequency_mode_vectors"][ perturb_index] reversed_direction = False standard = True # If we've found one or more negative frequencies in two consecutive iterations, let's dig in # deeper: if len(history) > 1: # Start by finding the latest iteration's parent: if history[-1]["index"] in history[-2]["children"]: parent_hist = history[-2] history[-1]["parent"] = parent_hist["index"] elif history[-1]["index"] in history[-3]["children"]: parent_hist = history[-3] history[-1]["parent"] = parent_hist["index"] else: raise AssertionError( "ERROR: your parent should always be one or two iterations behind you! Exiting..." ) # if the number of negative frequencies has remained constant or increased from parent to # child, if history[-1]["num_neg_freqs"] >= parent_hist[ "num_neg_freqs"]: # check to see if the parent only has one child, aka only the positive perturbation has # been tried, # in which case just try the negative perturbation from the same parent if len(parent_hist["children"]) == 1: ref_mol = parent_hist["molecule"] geom_to_perturb = parent_hist["geometry"] negative_freq_vecs = parent_hist[ "frequency_mode_vectors"][perturb_index] reversed_direction = True standard = False parent_hist["children"].append(len(history)) # If the parent has two children, aka both directions have been tried, then we have to # get creative: elif len(parent_hist["children"]) == 2: # If we're dealing with just one negative frequency, if parent_hist["num_neg_freqs"] == 1: if history[parent_hist["children"][0]][ "energy"] < history[-1]["energy"]: good_child = copy.deepcopy( history[parent_hist["children"][0]]) else: good_child = copy.deepcopy(history[-1]) if good_child["num_neg_freqs"] > 1: raise Exception( "ERROR: Child with lower energy has more negative frequencies! " "Exiting...") if good_child["energy"] < parent_hist["energy"]: make_good_child_next_parent = True elif (vector_list_diff( good_child["frequency_mode_vectors"] [perturb_index], parent_hist["frequency_mode_vectors"] [perturb_index], ) > 0.2): make_good_child_next_parent = True else: raise Exception( "ERROR: Good child not good enough! Exiting..." ) if make_good_child_next_parent: good_child["index"] = len(history) history.append(good_child) ref_mol = history[-1]["molecule"] geom_to_perturb = history[-1]["geometry"] negative_freq_vecs = history[-1][ "frequency_mode_vectors"][ perturb_index] else: raise Exception( "ERROR: Can't deal with multiple neg frequencies yet! Exiting..." ) else: raise AssertionError( "ERROR: Parent cannot have more than two childen! Exiting..." ) # Implicitly, if the number of negative frequencies decreased from parent to child, # continue normally. if standard: history[-1]["children"].append(len(history)) min_molecule_perturb_scale = 0.1 scale_grid = 10 perturb_scale_grid = (max_molecule_perturb_scale - min_molecule_perturb_scale) / scale_grid structure_successfully_perturbed = False for molecule_perturb_scale in np.arange( max_molecule_perturb_scale, min_molecule_perturb_scale, -perturb_scale_grid, ): new_coords = perturb_coordinates( old_coords=geom_to_perturb, negative_freq_vecs=negative_freq_vecs, molecule_perturb_scale=molecule_perturb_scale, reversed_direction=reversed_direction, ) new_molecule = Molecule( species=orig_species, coords=new_coords, charge=orig_charge, spin_multiplicity=orig_multiplicity, ) if check_connectivity and not transition_state: structure_successfully_perturbed = ( check_for_structure_changes( ref_mol, new_molecule) == "no_change") if structure_successfully_perturbed: break if not structure_successfully_perturbed: raise Exception( "ERROR: Unable to perturb coordinates to remove negative frequency without changing " "the connectivity! Exiting...") new_opt_QCInput = QCInput( molecule=new_molecule, rem=opt_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent, smx=orig_opt_input.smx, vdw_mode=orig_opt_input.vdw_mode, van_der_waals=orig_opt_input.van_der_waals, ) new_opt_QCInput.write_file(input_file)
def setUpClass(cls): cls.molecule = Molecule(["C", "O", "O"], [[0, 0, 0], [-1, 0, 0], [1, 0, 0]]) cls.model = MEGNetModel.from_file( os.path.join( CWD, "../../../mvl_models/mp-2019.4.1/formation_energy.hdf5"))
def get_subgraphs_as_molecules(self, use_weights=False): """ Retrieve subgraphs as molecules, useful for extracting molecules from periodic crystals. Will only return unique molecules, not any duplicates present in the crystal (a duplicate defined as an isomorphic subgraph). :param use_weights (bool): If True, only treat subgraphs as isomorphic if edges have the same weights. Typically, this means molecules will need to have the same bond lengths to be defined as duplicates, otherwise bond lengths can differ. This is a fairly robust approach, but will treat e.g. enantiomers as being duplicates. :return: list of unique Molecules in Structure """ # creating a supercell is an easy way to extract # molecules (and not, e.g., layers of a 2D crystal) # without adding extra logic if getattr(self, '_supercell_sg', None) is None: self._supercell_sg = supercell_sg = self*(3,3,3) # make undirected to find connected subgraphs supercell_sg.graph = nx.Graph(supercell_sg.graph) # find subgraphs all_subgraphs = list(nx.connected_component_subgraphs(supercell_sg.graph)) # discount subgraphs that lie across *supercell* boundaries # these will subgraphs representing crystals molecule_subgraphs = [] for subgraph in all_subgraphs: intersects_boundary = any([d['to_jimage'] != (0, 0, 0) for u, v, d in subgraph.edges(data=True)]) if not intersects_boundary: molecule_subgraphs.append(subgraph) # add specie names to graph to be able to test for isomorphism for subgraph in molecule_subgraphs: for n in subgraph: subgraph.add_node(n, specie=str(supercell_sg.structure[n].specie)) # now define how we test for isomorphism def node_match(n1, n2): return n1['specie'] == n2['specie'] def edge_match(e1, e2): if use_weights: return e1['weight'] == e2['weight'] else: return True # prune duplicate subgraphs unique_subgraphs = [] for subgraph in molecule_subgraphs: already_present = [nx.is_isomorphic(subgraph, g, node_match=node_match, edge_match=edge_match) for g in unique_subgraphs] if not any(already_present): unique_subgraphs.append(subgraph) # get Molecule objects for each subgraph molecules = [] for subgraph in unique_subgraphs: coords = [supercell_sg.structure[n].coords for n in subgraph.nodes()] species = [supercell_sg.structure[n].specie for n in subgraph.nodes()] molecule = Molecule(species, coords) # shift so origin is at center of mass molecule = molecule.get_centered_molecule() molecules.append(molecule) return molecules
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile( "(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile( "^\s*(Mulliken charges|Mulliken atomic charges)") mulliken_charge_patt = re.compile('^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)') end_mulliken_patt = re.compile( '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)') std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") forces_on_patt = re.compile( "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)") forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*") forces_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)") freq_on_patt = re.compile( "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*") freq_patt = re.compile("Frequencies\s--\s+(.*)") normal_mode_patt = re.compile( "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} self.link0 = {} self.cart_forces = [] self.frequencies = [] coord_txt = [] read_coord = 0 read_mulliken = False orbitals_txt = [] parse_stage = 0 num_basis_found = False terminated = False parse_forces = False forces = [] parse_freq = False frequencies = [] with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif link0_patt.match(line): m = link0_patt.match(line) self.link0[m.group(1)] = m.group(2) elif route_patt.search(line): params = read_route_line(line) self.functional = params[0] self.basis_set = params[1] self.route = params[2] self.dieze_tag = params[3] parse_stage = 1 elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append( [float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) if parse_forces: m = forces_patt.search(line) if m: forces.extend( [float(_v) for _v in m.groups()[2:5]]) elif forces_off_patt.search(line): self.cart_forces.append(forces) forces = [] parse_forces = False elif parse_freq: m = freq_patt.search(line) if m: values = [ float(_v) for _v in m.groups()[0].split() ] for value in values: frequencies.append([value, []]) elif normal_mode_patt.search(line): values = [float(_v) for _v in line.split()[2:]] n = int(len(values) / 3) for i in range(0, len(values), 3): j = -n + int(i / 3) frequencies[j][1].extend(values[i:i + 3]) elif line.find("-------------------") != -1: parse_freq = False self.frequencies.append(frequencies) frequencies = [] elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization " "error", "Convergence failure": "SCF convergence error" } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and \ num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and \ stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append( float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif orbital_patt.search(line): orbitals_txt.append(line) elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = True elif not parse_forces and forces_on_patt.search(line): parse_forces = True elif freq_on_patt.search(line): parse_freq = True if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = { int(m.group(1)): [m.group(2), float(m.group(3))] } mulliken_charges.update(dict) read_mulliken = False self.Mulliken_charges = mulliken_charges if not terminated: #raise IOError("Bad Gaussian output file.") warnings.warn("\n" + self.filename + \ ": Termination error or bad Gaussian output file !")
def test_split(self): bonds = [(0, 1), (4, 5)] alterations = { (2, 3): {"weight": 1.0}, (0, 5): {"weight": 2.0}, (1, 2): {"weight": 2.0}, (3, 4): {"weight": 2.0}, } # Perform retro-Diels-Alder reaction - turn product into reactants reactants = self.cyclohexene.split_molecule_subgraphs(bonds, allow_reverse=True, alterations=alterations) self.assertTrue(isinstance(reactants, list)) reactants = sorted(reactants, key=len) # After alterations, reactants should be ethylene and butadiene self.assertEqual(reactants[0], self.ethylene) self.assertEqual(reactants[1], self.butadiene) with self.assertRaises(MolGraphSplitError): self.cyclohexene.split_molecule_subgraphs([(0, 1)]) # Test naive charge redistribution hydroxide = Molecule(["O", "H"], [[0, 0, 0], [0.5, 0.5, 0.5]], charge=-1) oh_mg = MoleculeGraph.with_empty_graph(hydroxide) oh_mg.add_edge(0, 1) new_mgs = oh_mg.split_molecule_subgraphs([(0, 1)]) for mg in new_mgs: if str(mg.molecule[0].specie) == "O": self.assertEqual(mg.molecule.charge, -1) else: self.assertEqual(mg.molecule.charge, 0) # Trying to test to ensure that remapping of nodes to atoms works diff_species = Molecule( ["C", "I", "Cl", "Br", "F"], [ [0.8314, -0.2682, -0.9102], [1.3076, 1.3425, -2.2038], [-0.8429, -0.7410, -1.1554], [1.9841, -1.7636, -1.2953], [1.0098, 0.1231, 0.3916], ], ) diff_spec_mg = MoleculeGraph.with_empty_graph(diff_species) diff_spec_mg.add_edge(0, 1) diff_spec_mg.add_edge(0, 2) diff_spec_mg.add_edge(0, 3) diff_spec_mg.add_edge(0, 4) for i in range(1, 5): bond = (0, i) split_mgs = diff_spec_mg.split_molecule_subgraphs([bond]) for split_mg in split_mgs: species = nx.get_node_attributes(split_mg.graph, "specie") for j in range(len(split_mg.graph.nodes)): atom = split_mg.molecule[j] self.assertEqual(species[j], str(atom.specie))
def get_ase_mol(molname): """convert ase molecule to pymatgen style""" ase_mol = molecule(molname) pos = ase_mol.get_positions() symbols = ase_mol.get_chemical_symbols() return Molecule(symbols, pos)
def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun): try: fullpath = os.path.abspath(dir_name) d = jsanitize(self.additional_fields, strict=True) d["schema"] = { "code": "atomate", "version": QChemDrone.__version__ } d["dir_name"] = fullpath # If a saved "orig" input file is present, parse it incase the error handler made changes # to the initial input molecule or rem params, which we might want to filter for later if len(qcinput_files) > len(qcoutput_files): orig_input = QCInput.from_file( os.path.join(dir_name, qcinput_files.pop("orig"))) d["orig"] = {} d["orig"]["molecule"] = orig_input.molecule.as_dict() d["orig"]["molecule"]["charge"] = int( d["orig"]["molecule"]["charge"]) d["orig"]["rem"] = orig_input.rem d["orig"]["opt"] = orig_input.opt d["orig"]["pcm"] = orig_input.pcm d["orig"]["solvent"] = orig_input.solvent d["orig"]["smx"] = orig_input.smx if multirun: d["calcs_reversed"] = self.process_qchem_multirun( dir_name, qcinput_files, qcoutput_files) else: d["calcs_reversed"] = [ self.process_qchemrun(dir_name, taskname, qcinput_files.get(taskname), output_filename) for taskname, output_filename in qcoutput_files.items() ] # reverse the calculations data order so newest calc is first d["calcs_reversed"].reverse() d["structure_change"] = [] d["warnings"] = {} for entry in d["calcs_reversed"]: if "structure_change" in entry and "structure_change" not in d[ "warnings"]: if entry["structure_change"] != "no_change": d["warnings"]["structure_change"] = True if "structure_change" in entry: d["structure_change"].append(entry["structure_change"]) for key in entry["warnings"]: if key not in d["warnings"]: d["warnings"][key] = True d_calc_init = d["calcs_reversed"][-1] d_calc_final = d["calcs_reversed"][0] d["input"] = { "initial_molecule": d_calc_init["initial_molecule"], "job_type": d_calc_init["input"]["rem"]["job_type"] } d["output"] = { "initial_molecule": d_calc_final["initial_molecule"], "job_type": d_calc_final["input"]["rem"]["job_type"], "mulliken": d_calc_final["Mulliken"][-1] } if "RESP" in d_calc_final: d["output"]["resp"] = d_calc_final["RESP"][-1] elif "ESP" in d_calc_final: d["output"]["esp"] = d_calc_final["ESP"][-1] if d["output"]["job_type"] == "opt" or d["output"][ "job_type"] == "optimization": if "molecule_from_optimized_geometry" in d_calc_final: d["output"]["optimized_molecule"] = d_calc_final[ "molecule_from_optimized_geometry"] d["output"]["final_energy"] = d_calc_final["final_energy"] else: d["output"]["final_energy"] = "unstable" if d_calc_final["opt_constraint"]: d["output"]["constraint"] = [ d_calc_final["opt_constraint"][0], float(d_calc_final["opt_constraint"][6]) ] if d["output"]["job_type"] == "freq" or d["output"][ "job_type"] == "frequency": d["output"]["frequencies"] = d_calc_final["frequencies"] d["output"]["enthalpy"] = d_calc_final["total_enthalpy"] d["output"]["entropy"] = d_calc_final["total_entropy"] if d["input"]["job_type"] == "opt" or d["input"][ "job_type"] == "optimization": d["output"]["optimized_molecule"] = d_calc_final[ "initial_molecule"] d["output"]["final_energy"] = d["calcs_reversed"][1][ "final_energy"] opt_trajectory = [] calcs = copy.deepcopy(d["calcs_reversed"]) calcs.reverse() for calc in calcs: job_type = calc["input"]["rem"]["job_type"] if job_type == "opt" or job_type == "optimization": for ii, geom in enumerate(calc["geometries"]): site_properties = {"Mulliken": calc["Mulliken"][ii]} if "RESP" in calc: site_properties["RESP"] = calc["RESP"][ii] mol = Molecule(species=calc["species"], coords=geom, charge=calc["charge"], spin_multiplicity=calc["multiplicity"], site_properties=site_properties) traj_entry = {"molecule": mol} traj_entry["energy"] = calc["energy_trajectory"][ii] opt_trajectory.append(traj_entry) if opt_trajectory != []: d["opt_trajectory"] = opt_trajectory if "final_energy" not in d["output"]: if d_calc_final["final_energy"] != None: d["output"]["final_energy"] = d_calc_final["final_energy"] else: d["output"]["final_energy"] = d_calc_final["SCF"][-1][-1][ 0] if d_calc_final["completion"]: total_cputime = 0.0 total_walltime = 0.0 for calc in d["calcs_reversed"]: if calc["walltime"] is not None: total_walltime += calc["walltime"] if calc["cputime"] is not None: total_cputime += calc["cputime"] d["walltime"] = total_walltime d["cputime"] = total_cputime else: d["walltime"] = None d["cputime"] = None comp = d["output"]["initial_molecule"].composition d["formula_pretty"] = comp.reduced_formula d["formula_anonymous"] = comp.anonymized_formula d["formula_alphabetical"] = comp.alphabetical_formula d["chemsys"] = "-".join(sorted(set(d_calc_final["species"]))) if d_calc_final["point_group"] != None: d["pointgroup"] = d_calc_final["point_group"] else: try: d["pointgroup"] = PointGroupAnalyzer( d["output"]["initial_molecule"]).sch_symbol except ValueError: d["pointgroup"] = "PGA_error" bb = BabelMolAdaptor(d["output"]["initial_molecule"]) pbmol = bb.pybel_mol smiles = pbmol.write(str("smi")).split()[0] d["smiles"] = smiles d["state"] = "successful" if d_calc_final[ "completion"] else "unsuccessful" if "special_run_type" in d: if d["special_run_type"] == "frequency_flattener": if d["state"] == "successful": orig_num_neg_freq = sum( 1 for freq in d["calcs_reversed"][-2]["frequencies"] if freq < 0) orig_energy = d_calc_init["final_energy"] final_num_neg_freq = sum( 1 for freq in d_calc_final["frequencies"] if freq < 0) final_energy = d["calcs_reversed"][1]["final_energy"] d["num_frequencies_flattened"] = orig_num_neg_freq - final_num_neg_freq if final_num_neg_freq > 0: # If a negative frequency remains, # and it's too large to ignore, if final_num_neg_freq > 1 or abs( d["output"]["frequencies"][0]) >= 15.0: d["state"] = "unsuccessful" # then the flattening was unsuccessful if final_energy > orig_energy: d["warnings"]["energy_increased"] = True d["last_updated"] = datetime.datetime.utcnow() return d except Exception: logger.error(traceback.format_exc()) logger.error("Error in " + os.path.abspath(dir_name) + ".\n" + traceback.format_exc()) raise
def parse_coords(coord_lines): """ Helper method to parse coordinates. """ paras = {} var_pattern = re.compile("^([A-Za-z]+\S*)[\s=,]+([\d\-\.]+)$") for l in coord_lines: m = var_pattern.match(l.strip()) if m: paras[m.group(1)] = float(m.group(2)) species = [] coords = [] # Stores whether a Zmatrix format is detected. Once a zmatrix format # is detected, it is assumed for the remaining of the parsing. zmode = False for l in coord_lines: l = l.strip() if not l: break if (not zmode) and GaussianInput.xyz_patt.match(l): m = GaussianInput.xyz_patt.match(l) species.append(m.group(1)) toks = re.split("[,\s]+", l.strip()) if len(toks) > 4: coords.append([float(i) for i in toks[2:5]]) else: coords.append([float(i) for i in toks[1:4]]) elif GaussianInput.zmat_patt.match(l): zmode = True toks = re.split("[,\s]+", l.strip()) species.append(toks[0]) toks.pop(0) if len(toks) == 0: coords.append(np.array([0, 0, 0])) else: nn = [] parameters = [] while len(toks) > 1: ind = toks.pop(0) data = toks.pop(0) try: nn.append(int(ind)) except ValueError: nn.append(species.index(ind) + 1) try: val = float(data) parameters.append(val) except ValueError: if data.startswith("-"): parameters.append(-paras[data[1:]]) else: parameters.append(paras[data]) if len(nn) == 1: coords.append(np.array([0, 0, parameters[0]])) elif len(nn) == 2: coords1 = coords[nn[0] - 1] coords2 = coords[nn[1] - 1] bl = parameters[0] angle = parameters[1] axis = [0, 1, 0] op = SymmOp.from_origin_axis_angle( coords1, axis, angle, False) coord = op.operate(coords2) vec = coord - coords1 coord = vec * bl / np.linalg.norm(vec) + coords1 coords.append(coord) elif len(nn) == 3: coords1 = coords[nn[0] - 1] coords2 = coords[nn[1] - 1] coords3 = coords[nn[2] - 1] bl = parameters[0] angle = parameters[1] dih = parameters[2] v1 = coords3 - coords2 v2 = coords1 - coords2 axis = np.cross(v1, v2) op = SymmOp.from_origin_axis_angle( coords1, axis, angle, False) coord = op.operate(coords2) v1 = coord - coords1 v2 = coords1 - coords2 v3 = np.cross(v1, v2) adj = get_angle(v3, axis) axis = coords1 - coords2 op = SymmOp.from_origin_axis_angle( coords1, axis, dih - adj, False) coord = op.operate(coord) vec = coord - coords1 coord = vec * bl / np.linalg.norm(vec) + coords1 coords.append(coord) def parse_species(sp_str): """ The species specification can take many forms. E.g., simple integers representing atomic numbers ("8"), actual species string ("C") or a labelled species ("C1"). Sometimes, the species string is also not properly capitalized, e.g, ("c1"). This method should take care of these known formats. """ try: return int(sp_str) except ValueError: sp = re.sub("\d", "", sp_str) return sp.capitalize() species = [parse_species(sp) for sp in species] return Molecule(species, coords)
def get_subgraphs_as_molecules_all(sg, use_weights=False): """ Adapatation of http://pymatgen.org/_modules/pymatgen/analysis/graphs.html#StructureGraph.get_subgraphs_as_molecules for our needs Args: sg: structure graph use_weights: Returns: list of molecules """ # creating a supercell is an easy way to extract # molecules (and not, e.g., layers of a 2D crystal) # without adding extra logic supercell_sg = sg * (3, 3, 3) # make undirected to find connected subgraphs supercell_sg.graph = nx.Graph(supercell_sg.graph) # find subgraphs all_subgraphs = list(nx.connected_component_subgraphs(supercell_sg.graph)) # discount subgraphs that lie across *supercell* boundaries # these will subgraphs representing crystals molecule_subgraphs = [] for subgraph in all_subgraphs: intersects_boundary = any( [d["to_jimage"] != (0, 0, 0) for u, v, d in subgraph.edges(data=True)] ) if not intersects_boundary: molecule_subgraphs.append(subgraph) # add specie names to graph to be able to test for isomorphism for subgraph in molecule_subgraphs: for n in subgraph: subgraph.add_node(n, specie=str(supercell_sg.structure[n].specie)) # now define how we test for isomorphism def node_match(n1, n2): return n1["specie"] == n2["specie"] def edge_match(e1, e2): if use_weights: return e1["weight"] == e2["weight"] else: return True # get Molecule objects for each subgraph molecules = [] for subgraph in molecule_subgraphs: coords = [supercell_sg.structure[n].coords for n in subgraph.nodes()] species = [supercell_sg.structure[n].specie for n in subgraph.nodes()] molecule = Molecule(species, coords) molecules.append(molecule) return molecules
def cleave_operation(): struct = readstructure() if isinstance(Structure, Molecule): print("cleave operation is only supported for periodic structure") return print('your choice ?') print('{} >>> {}'.format('1', 'cleave surface')) print('{} >>> {}'.format('2', 'cleave sphere cluster')) print('{} >>> {}'.format('3', 'cleave shell structure')) wait_sep() in_str = "" while in_str == "": in_str = input().strip() choice = int(in_str) if choice == 1: print( " input the miller index, minimum size in angstroms of layers containing atomssupercell" ) print( " and Minimize size in angstroms of layers containing vacuum like this:" ) print(' 1 0 0 | 5 | 5') print(' it means miller index is [1,0,0]') print(" min_slab_size is 5 Ang ") print(" min_vacum_size is 5 Ang ") print(" or like this : ") print(' 2 | 5 | 5') print(' it will generate all slab with miller index less than 2') def generate_selected_slab(in_str): tmp_list = in_str.split('|') miller_index = [int(x) for x in tmp_list[0].strip().split()] min_slab_size = float(tmp_list[1]) min_vac_size = float(tmp_list[2]) slab = SlabGenerator(struct, miller_index, min_slab_size=min_slab_size, min_vacuum_size=min_vac_size, lll_reduce=True) slab_struct = slab.get_slab() slab_struct.sort() miller_str = [str(i) for i in miller_index] filename = '_'.join(miller_str) + '.vasp' slab_struct.to(filename=filename, fmt='POSCAR') def generate_all_slab(in_str): tmp_list = in_str.split('|') max_index = int(tmp_list[0]) min_slab_size = float(tmp_list[1]) min_vac_size = float(tmp_list[2]) slabs = generate_all_slabs(struct, max_index=max_index, min_slab_size=min_slab_size, min_vacuum_size=min_vac_size, lll_reduce=True) for slab_struct in slabs: slab_struct.sort() miller_str = [str(i) for i in slab_struct.miller_index] filename = '_'.join(miller_str) + '.vasp' slab_struct.to(filename=filename, fmt='POSCAR') wait_sep() in_str = "" while in_str == "": in_str = input().strip() len_para = len(in_str.split('|')[0].split()) #if in_str.strip().startswith('a'): if len_para == 3: generate_selected_slab(in_str) #elif in_str.strip().startswith('b'): elif len_para == 1: generate_all_slab(in_str) else: print("unknow format") os._exit() elif choice == 2: print( " input the center atom index, sphere radius and vacuum layer thickness" ) print(' 1 3.5 15') print( ' it means the sphere will be selected according to the 1st atom') print( " with the radius equals 5Ang, and vacuum layer thickness is 15 Ang" ) wait_sep() in_str = "" while in_str == "": in_str = input().strip() para = in_str.split() center_atom = int(para[0]) - 1 radius = float(para[1]) vacuum = float(para[2]) center_coord = struct[center_atom].coords sites = struct.get_neighbors_in_shell(center_coord, 0, radius) coords = [site[0].coords for site in sites] species = [site[0].specie for site in sites] mol = Molecule(coords=coords, species=species) max_dist = np.max(mol.distance_matrix) a = b = c = max_dist + vacuum box_struct = mol.get_boxed_structure(a, b, c) file_name = "sphere.vasp" box_struct.to(filename=file_name, fmt='poscar') elif choice == 3: print( " input the center atom index, start radius, shell thickness and") print(" vacuum layer thickness") print(' 1 5 10 15') print( ' it means the ball shell will be selected according to the 1st atom' ) print(" with the 5< r <15Ang, and vacuum layer thickness is 15 Ang") wait_sep() in_str = "" while in_str == "": in_str = input().strip() para = in_str.split() center_atom = int(para[0]) - 1 radius = float(para[1]) shell = float(para[2]) vacuum = float(para[3]) center_coord = struct[center_atom].coords sites = struct.get_neighbors_in_shell(center_coord, radius, shell) coords = [site[0].coords for site in sites] species = [site[0].specie for site in sites] mol = Molecule(coords=coords, species=species) max_dist = np.max(mol.distance_matrix) a = b = c = max_dist + vacuum box_struct = mol.get_boxed_structure(a, b, c) file_name = "shell.vasp" box_struct.to(filename=file_name, fmt='poscar') else: print("unkown choice") return
def _parse(self, filename): start_patt = re.compile(" \(Enter \S+l101\.exe\)") route_patt = re.compile(" #[pPnNtT]*.*") charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+" "Multiplicity\s+=\s*(\d+)") num_basis_func_patt = re.compile("([0-9]+)\s+basis functions") pcm_patt = re.compile("Polarizable Continuum Model") stat_type_patt = re.compile("imaginary frequencies") scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+") mp2_patt = re.compile("EUMP2\s*=\s*(.*)") oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)") termination_patt = re.compile("(Normal|Error) termination") error_patt = re.compile( "(! Non-Optimized Parameters !|Convergence failure)") mulliken_patt = re.compile("^\s*Mulliken atomic charges") mulliken_charge_patt = re.compile('^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)') end_mulliken_patt = re.compile( '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)') std_orientation_patt = re.compile("Standard orientation") end_patt = re.compile("--+") orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)") thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)=" "\s+([\d\.-]+)") self.properly_terminated = False self.is_pcm = False self.stationary_type = "Minimum" self.structures = [] self.corrections = {} self.energies = [] self.pcm = None self.errors = [] self.Mulliken_charges = {} coord_txt = [] read_coord = 0 read_mulliken = 0 orbitals_txt = [] parse_stage = 0 num_basis_found = False terminated = False with zopen(filename) as f: for line in f: if parse_stage == 0: if start_patt.search(line): parse_stage = 1 elif route_patt.search(line): self.route = {} for tok in line.split(): sub_tok = tok.strip().split("=") key = sub_tok[0].upper() self.route[key] = sub_tok[1].upper() \ if len(sub_tok) > 1 else "" m = re.match("(\w+)/([^/]+)", key) if m: self.functional = m.group(1) self.basis_set = m.group(2) elif parse_stage == 1: if charge_mul_patt.search(line): m = charge_mul_patt.search(line) self.charge = int(m.group(1)) self.spin_mult = int(m.group(2)) parse_stage = 2 elif parse_stage == 2: if self.is_pcm: self._check_pcm(line) if "FREQ" in self.route and thermo_patt.search(line): m = thermo_patt.search(line) if m.group(1) == "Zero-point": self.corrections["Zero-point"] = float(m.group(3)) else: key = m.group(2).strip(" to ") self.corrections[key] = float(m.group(3)) if read_mulliken: if not end_mulliken_patt.search(line): mulliken_txt.append(line) else: m = end_mulliken_patt.search(line) mulliken_charges = {} for line in mulliken_txt: if mulliken_charge_patt.search(line): m = mulliken_charge_patt.search(line) dict = { int(m.group(1)): [m.group(2), float(m.group(3))] } mulliken_charges.update(dict) read_mulliken = 0 self.Mulliken_charges = mulliken_charges if read_coord: if not end_patt.search(line): coord_txt.append(line) else: read_coord = (read_coord + 1) % 4 if not read_coord: sp = [] coords = [] for l in coord_txt[2:]: toks = l.split() sp.append(Element.from_Z(int(toks[1]))) coords.append( [float(i) for i in toks[3:6]]) self.structures.append(Molecule(sp, coords)) elif termination_patt.search(line): m = termination_patt.search(line) if m.group(1) == "Normal": self.properly_terminated = True terminated = True elif error_patt.search(line): error_defs = { "! Non-Optimized Parameters !": "Optimization error", "Convergence failure": "SCF convergence error" } m = error_patt.search(line) self.errors.append(error_defs[m.group(1)]) elif (not num_basis_found) and \ num_basis_func_patt.search(line): m = num_basis_func_patt.search(line) self.num_basis_func = int(m.group(1)) num_basis_found = True elif (not self.is_pcm) and pcm_patt.search(line): self.is_pcm = True self.pcm = {} elif "FREQ" in self.route and "OPT" in self.route and \ stat_type_patt.search(line): self.stationary_type = "Saddle" elif mp2_patt.search(line): m = mp2_patt.search(line) self.energies.append( float(m.group(1).replace("D", "E"))) elif oniom_patt.search(line): m = oniom_patt.matcher(line) self.energies.append(float(m.group(1))) elif scf_patt.search(line): m = scf_patt.search(line) self.energies.append(float(m.group(1))) elif std_orientation_patt.search(line): coord_txt = [] read_coord = 1 elif orbital_patt.search(line): orbitals_txt.append(line) elif mulliken_patt.search(line): mulliken_txt = [] read_mulliken = 1 if not terminated: raise IOError("Bad Gaussian output file.")
""" Dummy test systems """ from pymatgen.core import Composition, Structure, Lattice, Molecule from ._data_conversion import to_array from ._inspect import get_param_types DUMMY_OBJECTS = { 'str': 'H2O', 'composition': Composition('H2O'), 'structure': Structure(Lattice.cubic(3.167), ['Mo', 'Mo'], [[0, 0, 0], [0.5, 0.5, 0.5]]), 'molecule': Molecule(['C', 'O'], [[0, 0, 0], [1, 0, 0]]) } def get_describer_dummy_obj(instance): """ For a describers, get a dummy object for transform_one. This relies on the type hint. Args: instance (BaseDescriber): describers instance """ obj_type = getattr(instance, "describer_type", None) if obj_type is not None: return DUMMY_OBJECTS[obj_type.lower()] arg_types = get_param_types(instance.transform_one) arg_type = list(arg_types.values())[0]
def _parse_job(self, output): energy_patt = re.compile("Total \w+ energy\s+=\s+([\.\-\d]+)") #In cosmo solvation results; gas phase energy = -152.5044774212 energy_gas_patt = re.compile("gas phase energy\s+=\s+([\.\-\d]+)") #In cosmo solvation results; sol phase energy = -152.5044774212 energy_sol_patt = re.compile("sol phase energy\s+=\s+([\.\-\d]+)") coord_patt = re.compile("\d+\s+(\w+)\s+[\.\-\d]+\s+([\.\-\d]+)\s+" "([\.\-\d]+)\s+([\.\-\d]+)") corrections_patt = re.compile("([\w\-]+ correction to \w+)\s+=" "\s+([\.\-\d]+)") preamble_patt = re.compile("(No. of atoms|No. of electrons" "|SCF calculation type|Charge|Spin " "multiplicity)\s*:\s*(\S+)") error_defs = { "calculations not reaching convergence": "Bad convergence", "Calculation failed to converge": "Bad convergence", "geom_binvr: #indep variables incorrect": "autoz error", "dft optimize failed": "Geometry optimization failed" } data = {} energies = [] frequencies = None corrections = {} molecules = [] species = [] coords = [] errors = [] basis_set = {} bset_header = [] parse_geom = False parse_freq = False parse_bset = False job_type = "" for l in output.split("\n"): for e, v in error_defs.items(): if l.find(e) != -1: errors.append(v) if parse_geom: if l.strip() == "Atomic Mass": molecules.append(Molecule(species, coords)) species = [] coords = [] parse_geom = False else: m = coord_patt.search(l) if m: species.append(m.group(1).capitalize()) coords.append([ float(m.group(2)), float(m.group(3)), float(m.group(4)) ]) if parse_freq: if len(l.strip()) == 0: if len(frequencies[-1][1]) == 0: continue else: parse_freq = False else: vibs = [float(vib) for vib in l.strip().split()[1:]] num_vibs = len(vibs) for mode, dis in zip(frequencies[-num_vibs:], vibs): mode[1].append(dis) elif parse_bset: if l.strip() == "": parse_bset = False else: toks = l.split() if toks[0] != "Tag" and not re.match("\-+", toks[0]): basis_set[toks[0]] = dict( zip(bset_header[1:], toks[1:])) elif toks[0] == "Tag": bset_header = toks bset_header.pop(4) bset_header = [h.lower() for h in bset_header] else: m = energy_patt.search(l) if m: energies.append(Energy(m.group(1), "Ha").to("eV")) continue m = energy_gas_patt.search(l) if m: cosmo_scf_energy = energies[-1] energies[-1] = dict() energies[-1].update({"cosmo scf": cosmo_scf_energy}) energies[-1].update( {"gas phase": Energy(m.group(1), "Ha").to("eV")}) m = energy_sol_patt.search(l) if m: energies[-1].update( {"sol phase": Energy(m.group(1), "Ha").to("eV")}) m = preamble_patt.search(l) if m: try: val = int(m.group(2)) except ValueError: val = m.group(2) k = m.group(1).replace("No. of ", "n").replace(" ", "_") data[k.lower()] = val elif l.find("Geometry \"geometry\"") != -1: parse_geom = True elif l.find("Summary of \"ao basis\"") != -1: parse_bset = True elif l.find("P.Frequency") != -1: parse_freq = True if not frequencies: frequencies = [] frequencies.extend([(float(freq), []) for freq in l.strip().split()[1:]]) elif job_type == "" and l.strip().startswith("NWChem"): job_type = l.strip() if job_type == "NWChem DFT Module" and \ "COSMO solvation results" in output: job_type += " COSMO" else: m = corrections_patt.search(l) if m: corrections[m.group(1)] = FloatWithUnit( m.group(2), "kJ mol^-1").to("eV atom^-1") if frequencies: for freq, mode in frequencies: mode[:] = zip(*[iter(mode)] * 3) data.update({ "job_type": job_type, "energies": energies, "corrections": corrections, "molecules": molecules, "basis_set": basis_set, "errors": errors, "has_error": len(errors) > 0, "frequencies": frequencies }) return data
def from_string(cls, string_input): """ Read an NwInput from a string. Currently tested to work with files generated from this class itself. Args: string_input: string_input to parse. Returns: NwInput object """ directives = [] tasks = [] charge = None spin_multiplicity = None title = None basis_set = None theory_directives = {} geom_options = None symmetry_options = None memory_options = None lines = string_input.strip().split("\n") while len(lines) > 0: l = lines.pop(0).strip() if l == "": continue toks = l.split() if toks[0].lower() == "geometry": geom_options = toks[1:] l = lines.pop(0).strip() toks = l.split() if toks[0].lower() == "symmetry": symmetry_options = toks[1:] l = lines.pop(0).strip() #Parse geometry species = [] coords = [] while l.lower() != "end": toks = l.split() species.append(toks[0]) coords.append([float(i) for i in toks[1:]]) l = lines.pop(0).strip() mol = Molecule(species, coords) elif toks[0].lower() == "charge": charge = int(toks[1]) elif toks[0].lower() == "title": title = l[5:].strip().strip("\"") elif toks[0].lower() == "basis": #Parse basis sets l = lines.pop(0).strip() basis_set = {} while l.lower() != "end": toks = l.split() basis_set[toks[0]] = toks[-1].strip("\"") l = lines.pop(0).strip() elif toks[0].lower() in NwTask.theories: #Parse theory directives. theory = toks[0].lower() l = lines.pop(0).strip() theory_directives[theory] = {} while l.lower() != "end": toks = l.split() theory_directives[theory][toks[0]] = toks[-1] if toks[0] == "mult": spin_multiplicity = float(toks[1]) l = lines.pop(0).strip() elif toks[0].lower() == "task": tasks.append( NwTask(charge=charge, spin_multiplicity=spin_multiplicity, title=title, theory=toks[1], operation=toks[2], basis_set=basis_set, theory_directives=theory_directives.get(toks[1]))) elif toks[0].lower() == "memory": memory_options = ' '.join(toks[1:]) else: directives.append(l.strip().split()) return NwInput(mol, tasks=tasks, directives=directives, geometry_options=geom_options, symmetry_options=symmetry_options, memory_options=memory_options)
def opt_with_frequency_flattener(cls, qchem_command, multimode="openmp", input_file="mol.qin", output_file="mol.qout", qclog_file="mol.qclog", max_iterations=10, max_molecule_perturb_scale=0.3, check_connectivity=True, **QCJob_kwargs): """ Optimize a structure and calculate vibrational frequencies to check if the structure is in a true minima. If a frequency is negative, iteratively perturbe the geometry, optimize, and recalculate frequencies until all are positive, aka a true minima has been found. Args: qchem_command (str): Command to run QChem. multimode (str): Parallelization scheme, either openmp or mpi. input_file (str): Name of the QChem input file. output_file (str): Name of the QChem output file. max_iterations (int): Number of perturbation -> optimization -> frequency iterations to perform. Defaults to 10. max_molecule_perturb_scale (float): The maximum scaled perturbation that can be applied to the molecule. Defaults to 0.3. check_connectivity (bool): Whether to check differences in connectivity introduced by structural perturbation. Defaults to True. **QCJob_kwargs: Passthrough kwargs to QCJob. See :class:`custodian.qchem.jobs.QCJob`. """ min_molecule_perturb_scale = 0.1 scale_grid = 10 perturb_scale_grid = (max_molecule_perturb_scale - min_molecule_perturb_scale) / scale_grid if not os.path.exists(input_file): raise AssertionError('Input file must be present!') orig_opt_input = QCInput.from_file(input_file) orig_opt_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem["job_type"] = "freq" first = True reversed_direction = False num_neg_freqs = [] for ii in range(max_iterations): yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".opt_" + str(ii), backup=first, **QCJob_kwargs)) first = False opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data if opt_outdata[ "structure_change"] == "unconnected_fragments" and not opt_outdata[ "completion"]: print( "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..." ) break else: freq_QCInput = QCInput(molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=orig_freq_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent) freq_QCInput.write_file(input_file) yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_" + str(ii), backup=first, **QCJob_kwargs)) outdata = QCOutput(output_file + ".freq_" + str(ii)).data errors = outdata.get("errors") if len(errors) != 0: raise AssertionError( 'No errors should be encountered while flattening frequencies!' ) if outdata.get('frequencies')[0] > 0.0: print("All frequencies positive!") break else: num_neg_freqs += [ sum(1 for freq in outdata.get('frequencies') if freq < 0) ] if len(num_neg_freqs) > 1: if num_neg_freqs[-1] == num_neg_freqs[ -2] and not reversed_direction: reversed_direction = True elif num_neg_freqs[-1] == num_neg_freqs[ -2] and reversed_direction: if len(num_neg_freqs) < 3: raise AssertionError( "ERROR: This should only be possible after at least three frequency flattening iterations! Exiting..." ) else: raise Exception( "ERROR: Reversing the perturbation direction still could not flatten any frequencies. Exiting..." ) elif num_neg_freqs[-1] != num_neg_freqs[ -2] and reversed_direction: reversed_direction = False negative_freq_vecs = outdata.get( "frequency_mode_vectors")[0] structure_successfully_perturbed = False for molecule_perturb_scale in np.arange( max_molecule_perturb_scale, min_molecule_perturb_scale, -perturb_scale_grid): new_coords = perturb_coordinates( old_coords=outdata.get("initial_geometry"), negative_freq_vecs=negative_freq_vecs, molecule_perturb_scale=molecule_perturb_scale, reversed_direction=reversed_direction) new_molecule = Molecule( species=outdata.get('species'), coords=new_coords, charge=outdata.get('charge'), spin_multiplicity=outdata.get('multiplicity')) if check_connectivity: old_molgraph = MoleculeGraph.with_local_env_strategy( outdata.get("initial_molecule"), OpenBabelNN(), reorder=False, extend_structure=False) new_molgraph = MoleculeGraph.with_local_env_strategy( new_molecule, OpenBabelNN(), reorder=False, extend_structure=False) if old_molgraph.isomorphic_to(new_molgraph): structure_successfully_perturbed = True break if not structure_successfully_perturbed: raise Exception( "ERROR: Unable to perturb coordinates to remove negative frequency without changing the connectivity! Exiting..." ) new_opt_QCInput = QCInput(molecule=new_molecule, rem=orig_opt_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent) new_opt_QCInput.write_file(input_file)
def opt_with_frequency_flattener(cls, qchem_command, multimode="openmp", input_file="mol.qin", output_file="mol.qout", qclog_file="mol.qclog", max_iterations=10, max_molecule_perturb_scale=0.3, reversed_direction=False, ignore_connectivity=False, **QCJob_kwargs): """ Optimize a structure and calculate vibrational frequencies to check if the structure is in a true minima. If a frequency is negative, iteratively perturbe the geometry, optimize, and recalculate frequencies until all are positive, aka a true minima has been found. Args: qchem_command (str): Command to run QChem. multimode (str): Parallelization scheme, either openmp or mpi. input_file (str): Name of the QChem input file. output_file (str): Name of the QChem output file. max_iterations (int): Number of perturbation -> optimization -> frequency iterations to perform. Defaults to 10. max_molecule_perturb_scale (float): The maximum scaled perturbation that can be applied to the molecule. Defaults to 0.3. reversed_direction (bool): Whether to reverse the direction of the vibrational frequency vectors. Defaults to False. ignore_connectivity (bool): Whether to ignore differences in connectivity introduced by structural perturbation. Defaults to False. **QCJob_kwargs: Passthrough kwargs to QCJob. See :class:`custodian.qchem.jobs.QCJob`. """ min_molecule_perturb_scale = 0.1 scale_grid = 10 perturb_scale_grid = (max_molecule_perturb_scale - min_molecule_perturb_scale) / scale_grid msc = MoleculeStructureComparator() if not os.path.exists(input_file): raise AssertionError('Input file must be present!') orig_opt_input = QCInput.from_file(input_file) orig_opt_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem["job_type"] = "freq" first = True for ii in range(max_iterations): yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".opt_" + str(ii), backup=first, **QCJob_kwargs)) first = False opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data if opt_outdata["structure_change"] == "unconnected_fragments": print( "Unstable molecule broke into unconnected fragments! Exiting..." ) break else: freq_QCInput = QCInput(molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=orig_freq_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent) freq_QCInput.write_file(input_file) yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_" + str(ii), backup=first, **QCJob_kwargs)) outdata = QCOutput(output_file + ".freq_" + str(ii)).data errors = outdata.get("errors") if len(errors) != 0: raise AssertionError( 'No errors should be encountered while flattening frequencies!' ) if outdata.get('frequencies')[0] > 0.0: print("All frequencies positive!") break else: negative_freq_vecs = outdata.get( "frequency_mode_vectors")[0] old_coords = outdata.get("initial_geometry") old_molecule = outdata.get("initial_molecule") structure_successfully_perturbed = False for molecule_perturb_scale in np.arange( max_molecule_perturb_scale, min_molecule_perturb_scale, -perturb_scale_grid): new_coords = perturb_coordinates( old_coords=old_coords, negative_freq_vecs=negative_freq_vecs, molecule_perturb_scale=molecule_perturb_scale, reversed_direction=reversed_direction) new_molecule = Molecule( species=outdata.get('species'), coords=new_coords, charge=outdata.get('charge'), spin_multiplicity=outdata.get('multiplicity')) if msc.are_equal(old_molecule, new_molecule) or ignore_connectivity: structure_successfully_perturbed = True break if not structure_successfully_perturbed: raise Exception( "Unable to perturb coordinates to remove negative frequency without changing the bonding structure" ) new_opt_QCInput = QCInput(molecule=new_molecule, rem=orig_opt_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent) new_opt_QCInput.write_file(input_file)
def opt_with_frequency_flattener(cls, qchem_command, multimode="openmp", input_file="mol.qin", output_file="mol.qout", qclog_file="mol.qclog", max_iterations=10, max_molecule_perturb_scale=0.3, check_connectivity=True, linked=True, **QCJob_kwargs): """ Optimize a structure and calculate vibrational frequencies to check if the structure is in a true minima. If a frequency is negative, iteratively perturbe the geometry, optimize, and recalculate frequencies until all are positive, aka a true minima has been found. Args: qchem_command (str): Command to run QChem. multimode (str): Parallelization scheme, either openmp or mpi. input_file (str): Name of the QChem input file. output_file (str): Name of the QChem output file. max_iterations (int): Number of perturbation -> optimization -> frequency iterations to perform. Defaults to 10. max_molecule_perturb_scale (float): The maximum scaled perturbation that can be applied to the molecule. Defaults to 0.3. check_connectivity (bool): Whether to check differences in connectivity introduced by structural perturbation. Defaults to True. **QCJob_kwargs: Passthrough kwargs to QCJob. See :class:`custodian.qchem.jobs.QCJob`. """ if not os.path.exists(input_file): raise AssertionError("Input file must be present!") if linked: energy_diff_cutoff = 0.0000001 orig_input = QCInput.from_file(input_file) freq_rem = copy.deepcopy(orig_input.rem) freq_rem["job_type"] = "freq" opt_rem = copy.deepcopy(orig_input.rem) opt_rem["geom_opt_hessian"] = "read" opt_rem["scf_guess_always"] = True first = True energy_history = [] for ii in range(max_iterations): yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".opt_" + str(ii), scratch_dir=os.getcwd(), save_scratch=True, save_name="chain_scratch", backup=first, **QCJob_kwargs)) opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data first = False if (opt_outdata["structure_change"] == "unconnected_fragments" and not opt_outdata["completion"]): print( "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..." ) break else: energy_history.append(opt_outdata.get("final_energy")) freq_QCInput = QCInput( molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=freq_rem, opt=orig_input.opt, pcm=orig_input.pcm, solvent=orig_input.solvent, smx=orig_input.smx, ) freq_QCInput.write_file(input_file) yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_" + str(ii), scratch_dir=os.getcwd(), save_scratch=True, save_name="chain_scratch", backup=first, **QCJob_kwargs)) outdata = QCOutput(output_file + ".freq_" + str(ii)).data errors = outdata.get("errors") if len(errors) != 0: raise AssertionError( "No errors should be encountered while flattening frequencies!" ) if outdata.get("frequencies")[0] > 0.0: print("All frequencies positive!") break elif (abs(outdata.get("frequencies")[0]) < 15.0 and outdata.get("frequencies")[1] > 0.0): print( "One negative frequency smaller than 15.0 - not worth further flattening!" ) break else: if len(energy_history) > 1: if (abs(energy_history[-1] - energy_history[-2]) < energy_diff_cutoff): print("Energy change below cutoff!") break opt_QCInput = QCInput( molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=opt_rem, opt=orig_input.opt, pcm=orig_input.pcm, solvent=orig_input.solvent, smx=orig_input.smx, ) opt_QCInput.write_file(input_file) if os.path.exists(os.path.join(os.getcwd(), "chain_scratch")): shutil.rmtree(os.path.join(os.getcwd(), "chain_scratch")) else: if not os.path.exists(input_file): raise AssertionError("Input file must be present!") orig_opt_input = QCInput.from_file(input_file) orig_opt_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem["job_type"] = "freq" first = True history = [] for ii in range(max_iterations): yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".opt_" + str(ii), backup=first, **QCJob_kwargs)) opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data if first: orig_species = copy.deepcopy(opt_outdata.get("species")) orig_charge = copy.deepcopy(opt_outdata.get("charge")) orig_multiplicity = copy.deepcopy( opt_outdata.get("multiplicity")) orig_energy = copy.deepcopy( opt_outdata.get("final_energy")) first = False if (opt_outdata["structure_change"] == "unconnected_fragments" and not opt_outdata["completion"]): print( "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..." ) break else: freq_QCInput = QCInput( molecule=opt_outdata.get( "molecule_from_optimized_geometry"), rem=orig_freq_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent, smx=orig_opt_input.smx, ) freq_QCInput.write_file(input_file) yield (QCJob(qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_" + str(ii), backup=first, **QCJob_kwargs)) outdata = QCOutput(output_file + ".freq_" + str(ii)).data errors = outdata.get("errors") if len(errors) != 0: raise AssertionError( "No errors should be encountered while flattening frequencies!" ) if outdata.get("frequencies")[0] > 0.0: print("All frequencies positive!") if opt_outdata.get("final_energy") > orig_energy: print( "WARNING: Energy increased during frequency flattening!" ) break else: hist = {} hist["molecule"] = copy.deepcopy( outdata.get("initial_molecule")) hist["geometry"] = copy.deepcopy( outdata.get("initial_geometry")) hist["frequencies"] = copy.deepcopy( outdata.get("frequencies")) hist["frequency_mode_vectors"] = copy.deepcopy( outdata.get("frequency_mode_vectors")) hist["num_neg_freqs"] = sum( 1 for freq in outdata.get("frequencies") if freq < 0) hist["energy"] = copy.deepcopy( opt_outdata.get("final_energy")) hist["index"] = len(history) hist["children"] = [] history.append(hist) ref_mol = history[-1]["molecule"] geom_to_perturb = history[-1]["geometry"] negative_freq_vecs = history[-1][ "frequency_mode_vectors"][0] reversed_direction = False standard = True # If we've found one or more negative frequencies in two consecutive iterations, let's dig in # deeper: if len(history) > 1: # Start by finding the latest iteration's parent: if history[-1]["index"] in history[-2]["children"]: parent_hist = history[-2] history[-1]["parent"] = parent_hist["index"] elif history[-1]["index"] in history[-3][ "children"]: parent_hist = history[-3] history[-1]["parent"] = parent_hist["index"] else: raise AssertionError( "ERROR: your parent should always be one or two iterations behind you! Exiting..." ) # if the number of negative frequencies has remained constant or increased from parent to # child, if (history[-1]["num_neg_freqs"] >= parent_hist["num_neg_freqs"]): # check to see if the parent only has one child, aka only the positive perturbation has # been tried, # in which case just try the negative perturbation from the same parent if len(parent_hist["children"]) == 1: ref_mol = parent_hist["molecule"] geom_to_perturb = parent_hist["geometry"] negative_freq_vecs = parent_hist[ "frequency_mode_vectors"][0] reversed_direction = True standard = False parent_hist["children"].append( len(history)) # If the parent has two children, aka both directions have been tried, then we have to # get creative: elif len(parent_hist["children"]) == 2: # If we're dealing with just one negative frequency, if parent_hist["num_neg_freqs"] == 1: make_good_child_next_parent = False if (history[parent_hist["children"] [0]]["energy"] < history[-1]["energy"]): good_child = copy.deepcopy(history[ parent_hist["children"][0]]) else: good_child = copy.deepcopy( history[-1]) if good_child["num_neg_freqs"] > 1: raise Exception( "ERROR: Child with lower energy has more negative frequencies! " "Exiting...") elif (good_child["energy"] < parent_hist["energy"]): make_good_child_next_parent = True elif (vector_list_diff( good_child[ "frequency_mode_vectors"] [0], parent_hist[ "frequency_mode_vectors"] [0], ) > 0.2): make_good_child_next_parent = True else: raise Exception( "ERROR: Good child not good enough! Exiting..." ) if make_good_child_next_parent: good_child["index"] = len(history) history.append(good_child) ref_mol = history[-1]["molecule"] geom_to_perturb = history[-1][ "geometry"] negative_freq_vecs = history[-1][ "frequency_mode_vectors"][0] else: raise Exception( "ERROR: Can't deal with multiple neg frequencies yet! Exiting..." ) else: raise AssertionError( "ERROR: Parent cannot have more than two childen! Exiting..." ) # Implicitly, if the number of negative frequencies decreased from parent to child, # continue normally. if standard: history[-1]["children"].append(len(history)) min_molecule_perturb_scale = 0.1 scale_grid = 10 perturb_scale_grid = ( max_molecule_perturb_scale - min_molecule_perturb_scale) / scale_grid structure_successfully_perturbed = False for molecule_perturb_scale in np.arange( max_molecule_perturb_scale, min_molecule_perturb_scale, -perturb_scale_grid, ): new_coords = perturb_coordinates( old_coords=geom_to_perturb, negative_freq_vecs=negative_freq_vecs, molecule_perturb_scale=molecule_perturb_scale, reversed_direction=reversed_direction, ) new_molecule = Molecule( species=orig_species, coords=new_coords, charge=orig_charge, spin_multiplicity=orig_multiplicity, ) if check_connectivity: structure_successfully_perturbed = ( check_for_structure_changes( ref_mol, new_molecule) == "no_change") if structure_successfully_perturbed: break if not structure_successfully_perturbed: raise Exception( "ERROR: Unable to perturb coordinates to remove negative frequency without changing " "the connectivity! Exiting...") new_opt_QCInput = QCInput( molecule=new_molecule, rem=orig_opt_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent, smx=orig_opt_input.smx, ) new_opt_QCInput.write_file(input_file)
def _get_SiteEnvironments(struct: PymatgenStructure, cutoff: float, PBC: List[bool], get_permutations: bool = True, eigen_tol: float = 1e-5) -> List[Dict[str, Any]]: """ Used to extract information about both primitive cells and data points. Extract local environments from Structure object by calculating neighbors based on gaussian distance. For primitive cell, Different permutations of the neighbors are calculated and will be later will mapped for data point in the _SiteEnvironment.get_mapping() function. site types , Parameters ---------- struct: PymatgenStructure Pymatgen Structure object of the primitive cell used for calculating neighbors from lattice transformations.It also requires site_properties attribute with "Sitetypes"(Active or spectator site). cutoff : float cutoff distance in angstrom for collecting local environment. pbc : np.ndarray Periodic boundary condition get_permutations : bool (default True) Whether to find permuted neighbor list or not. eigen_tol : float (default 1e-5) Tolerance for eigenanalysis of point group analysis in pymatgen. Returns ------ site_envs : List[Dict[str, Any]] list of local_env class """ try: from pymatgen.core import Molecule from pymatgen.symmetry.analyzer import PointGroupAnalyzer except: raise ImportError("This class requires pymatgen to be installed.") pbc = np.array(PBC) structure = struct neighbors = structure.get_all_neighbors(cutoff, include_index=True) symbols = structure.species site_idxs = [ i for i, sitetype in enumerate(structure.site_properties['SiteTypes']) if sitetype == 'A1' ] site_sym_map = {} sym_site_map = {} for i, new_ele in enumerate(structure.species): sym_site_map[new_ele] = structure.site_properties['SiteTypes'][i] site_sym_map[structure.site_properties['SiteTypes'][i]] = new_ele site_envs = [] for site_idx in site_idxs: local_env_sym = [symbols[site_idx]] local_env_xyz = [structure[site_idx].coords] local_env_dist = [0.0] local_env_sitemap = [site_idx] for n in neighbors[site_idx]: # if PBC condition is fulfilled.. c = np.around(n[0].frac_coords, 10) withinPBC = np.logical_and(0 <= c, c < 1) if np.all(withinPBC[~pbc]): local_env_xyz.append(n[0].coords) local_env_sym.append(n[0].specie) local_env_dist.append(n[1]) local_env_sitemap.append(n[2]) local_env_xyz = np.subtract(local_env_xyz, np.mean(local_env_xyz, 0)) perm = [] if get_permutations: finder = PointGroupAnalyzer(Molecule(local_env_sym, local_env_xyz), eigen_tolerance=eigen_tol) pg = finder.get_pointgroup() for i, op in enumerate(pg): newpos = op.operate_multi(local_env_xyz) perm.append( np.argmin(cdist(local_env_xyz, newpos), axis=1).tolist()) site_env = { 'pos': local_env_xyz, 'sitetypes': [sym_site_map[s] for s in local_env_sym], 'env2config': local_env_sitemap, 'permutations': perm, 'dist': local_env_dist } site_envs.append(site_env) return site_envs
def parse_symmetry(pos): mol = Molecule(["C"] * len(pos), pos) pga = PointGroupAnalyzer(mol) return pga.sch_symbol
def extract_molecule(self, indices: List[int]) -> Molecule: struct = self.get_molecular_structure_from_indices(indices) sgraph = self.get_structure_graph(struct) coords = self.walk_graph_and_get_coords(sgraph) return Molecule(species=struct.species, coords=coords)
from pymatgen.io.vaspio.vasp_input import Incar, Poscar, Potcar, Kpoints from pymatgen.core import Structure, Molecule from mpinterfaces.interface import Interface, Ligand #create ligand, interface and slab from the starting POSCARs strt= Structure.from_file("POSCAR_PbS_bulk_with_vdw") #using POSCAR of vdW relaxed PbS mol_struct= Structure.from_file("POSCAR_DMF") #using POSCAR of vdW relaxed PbS mol= Molecule(mol_struct.species, mol_struct.cart_coords) DMF= Ligand([mol]) #create Ligand DMF supercell = [1,1,1] # slab thickness and vacuum set manual for now to converged values, surface coverage fixed at 0.014 ligand/sq.Angstrom #for consistency, best ligand spacing at the coverage min_thick= 19 min_vac= 12 surface_coverage= 0.014 #hkl of facet to reproduce hkl= [1,0,0] # specify the species on slab to adsorb over slab_species= 'Pb' # specify the species onb ligand serving as the bridge atom adatom_on_ligand= 'O' #initial adsorption distance in angstrom ads_distance = 3.0
def from_string(cls, string_input): """ Read an FiestaInput from a string. Currently tested to work with files generated from this class itself. Args: string_input: string_input to parse. Returns: FiestaInput object """ correlation_grid = {} Exc_DFT_option = {} COHSEX_options = {} GW_options= {} BSE_TDDFT_options = {} lines = string_input.strip().split("\n") #number of atoms and species lines.pop(0) l = lines.pop(0).strip() toks = l.split() nat = toks[0] nsp = toks[1] # number of valence bands lines.pop(0) l = lines.pop(0).strip() toks = l.split() nvbands = toks[0] # correlation_grid # number of points and spacing in eV for correlation grid lines.pop(0) l = lines.pop(0).strip() toks = l.split() correlation_grid['n_grid'] = toks[0] correlation_grid['dE_grid'] = toks[1] # Exc DFT # relire=1 ou recalculer=0 Exc DFT lines.pop(0) l = lines.pop(0).strip() toks = l.split() Exc_DFT_option['rdVxcpsi'] = toks[0] # COHSEX # number of COHSEX corrected occp and unoccp bands: C=COHSEX H=HF lines.pop(0) l = lines.pop(0).strip() toks = l.split() COHSEX_options['nv_cohsex'] = toks[0] COHSEX_options['nc_cohsex'] = toks[1] COHSEX_options['eigMethod'] = toks[2] # number of COHSEX iter, scf on wfns, mixing coeff; V=RI-V I=RI-D lines.pop(0) l = lines.pop(0).strip() toks = l.split() COHSEX_options['nit_cohsex'] = toks[0] COHSEX_options['resMethod'] = toks[1] COHSEX_options['scf_cohsex_wf'] = toks[2] COHSEX_options['mix_cohsex'] = toks[3] # GW # number of GW corrected occp and unoccp bands lines.pop(0) l = lines.pop(0).strip() toks = l.split() GW_options['nv_corr'] = toks[0] GW_options['nc_corr'] = toks[1] # number of GW iterations lines.pop(0) l = lines.pop(0).strip() toks = l.split() GW_options['nit_gw'] = toks[0] # BSE # dumping for BSE and TDDFT lines.pop(0) l = lines.pop(0).strip() toks = l.split() BSE_TDDFT_options['do_bse'] = toks[0] BSE_TDDFT_options['do_tddft'] = toks[1] # number of occp. and virtual bands fo BSE: nocore and up to 40 eVs lines.pop(0) l = lines.pop(0).strip() toks = l.split() BSE_TDDFT_options['nv_bse'] = toks[0] BSE_TDDFT_options['nc_bse'] = toks[1] # number of excitations needed and number of iterations lines.pop(0) l = lines.pop(0).strip() toks = l.split() BSE_TDDFT_options['npsi_bse'] = toks[0] BSE_TDDFT_options['nit_bse'] = toks[1] # Molecule # list of symbols in order lines.pop(0) atname = [] i = int(nsp) while i != 0: l = lines.pop(0).strip() toks = l.split() atname.append(toks[0]) i -= 1 # scaling factor lines.pop(0) l = lines.pop(0).strip() toks = l.split() scale = toks[0] # atoms x,y,z cartesian .. will be multiplied by scale lines.pop(0) #Parse geometry species = [] coords = [] i = int(nat) while i != 0: l = lines.pop(0).strip() toks = l.split() coords.append([float(j) for j in toks[0:3]]) species.append(atname[int(toks[3])-1]) i -= 1 mol = Molecule(species, coords) return FiestaInput(mol=mol, correlation_grid=correlation_grid, Exc_DFT_option=Exc_DFT_option, COHSEX_options=COHSEX_options, GW_options=GW_options, BSE_TDDFT_options=BSE_TDDFT_options)