def _write_torsion_vmd(coords, atomnos, constrained_indexes, grouped_torsions): import os from tscode.utils import write_xyz with open('torsion_test.xyz', 'w') as f: write_xyz(coords, atomnos, f) path = os.path.join(os.getcwd(), 'torsion_test.vmd') with open(path, 'w') as f: s = ('display resetview\n' + 'mol new {%s}\n' % (os.path.join(os.getcwd() + r'\torsion_test.xyz')) + 'mol representation Lines 2\n' + 'mol color ColorID 16\n') for group, color in zip(grouped_torsions, (7, 9, 10, 11, 29, 16)): for torsion in group: s += ('mol selection index %s\n' % (' '.join([str(i) for i in torsion.torsion[1:-1]])) + 'mol representation CPK 0.7 0.5 50 50\n' + f'mol color ColorID {color}\n' + 'mol material Transparent\n' + 'mol addrep top\n') for a, b in constrained_indexes: s += f'label add Bonds 0/{a} 0/{b}\n' f.write(s)
def confab_operator(filename, options, logfunction=None): ''' ''' if logfunction is not None: logfunction( f'--> Performing conformational search and optimization on {filename}' ) data = read_xyz(filename) if len(data.atomcoords) > 1: raise InputError( f'Requested conformational search on file {filename} that already contains more than one structure.' ) if len( tuple( connected_components(graphize(data.atomcoords[0], data.atomnos)))) > 1: raise InputError(( f'Requested conformational search on a molecular complex (file {filename}). ' 'Confab is not suited for this task, and using TSCoDe\'s csearch> operator ' 'is a better idea.')) if len(set(data.atomnos) - {1, 6, 7, 8, 9, 15, 16, 17, 35, 53}) != 0: raise InputError(( 'Requested conformational search on a molecule that contains atoms different ' 'than the ones for which OpenBabel Force Fields are parametrized. Please ' 'perform this conformational search through the more sophisticated and better ' 'integrated csearch> operator, part of the TSCoDe program.')) confname = filename[:-4] + '_confab.xyz' with suppress_stdout_stderr(): check_call( f'obabel {filename} -O {confname} --confab --rcutoff 0.5 --original' .split(), stdout=DEVNULL, stderr=STDOUT) # running Confab through Openbabel data = read_xyz(confname) conformers = data.atomcoords if len(conformers) > 10 and not options.let: conformers = conformers[0:10] logfunction( f'Will use only the best 10 conformers for TSCoDe embed.\n') os.remove(confname) with open(confname, 'w') as f: for i, conformer in enumerate(conformers): write_xyz(conformer, data.atomnos, f, title=f'Generated conformer {i}') return confname
def opt_operator(filename, embedder, logfunction=None): ''' ''' if logfunction is not None: logfunction( f'--> Performing {embedder.options.calculator} {embedder.options.theory_level} optimization on {filename}' ) t_start = time.perf_counter() data = read_xyz(filename) conformers = data.atomcoords energies = [] lowest_calc = _get_lowest_calc(embedder) conformers, energies = _refine_structures( conformers, data.atomnos, *lowest_calc, loadstring='Optimizing conformer') energies, conformers = zip( *sorted(zip(energies, conformers), key=lambda x: x[0])) energies = np.array(energies) - np.min(energies) conformers = np.array(conformers) # sorting structures based on energy mask = energies < 10 # getting the structures to reject (Rel Energy > 10 kcal/mol) if logfunction is not None: s = 's' if len(conformers) > 1 else '' s = f'Completed optimization on {len(conformers)} conformer{s}. ({time_to_string(time.perf_counter()-t_start)}).\n' if max(energies) > 10: s += f'Discarded {len(conformers)-np.count_nonzero(mask)}/{len(conformers)} unstable conformers (Rel. E. > 10 kcal/mol)\n' conformers, energies = conformers[mask], energies[mask] # applying the mask that rejects high energy confs optname = filename[:-4] + '_opt.xyz' with open(optname, 'w') as f: for i, conformer in enumerate(conformers): write_xyz( conformer, data.atomnos, f, title= f'Optimized conformer {i} - Rel. E. = {round(energies[i], 3)} kcal/mol' ) logfunction(s + '\n') return optname
def openbabel_opt(structure, atomnos, constrained_indexes, graphs=None, check=False, method='UFF', **kwargs): ''' return : MM-optimized structure (UFF/MMFF) ''' assert not check or graphs is not None, 'Either provide molecular graphs or do not check for scrambling.' filename='temp_ob_in.xyz' with open(filename, 'w') as f: write_xyz(structure, atomnos, f) outname = 'temp_ob_out.xyz' # Standard openbabel molecule load conv = ob.OBConversion() conv.SetInAndOutFormats('xyz','xyz') mol = ob.OBMol() more = conv.ReadFile(mol, filename) i = 0 # Define constraints constraints = ob.OBFFConstraints() for a, b in constrained_indexes: first_atom = mol.GetAtom(int(a+1)) length = first_atom.GetDistance(int(b+1)) constraints.AddDistanceConstraint(int(a+1), int(b+1), length) # Angstroms # constraints.AddAngleConstraint(1, 2, 3, 120.0) # Degrees # constraints.AddTorsionConstraint(1, 2, 3, 4, 180.0) # Degrees # Setup the force field with the constraints forcefield = ob.OBForceField.FindForceField(method) forcefield.Setup(mol, constraints) forcefield.SetConstraints(constraints) # Do a 500 steps conjugate gradient minimization # (or less if converges) and save the coordinates to mol. forcefield.ConjugateGradients(500) forcefield.GetCoordinates(mol) energy = forcefield.Energy() # Write the mol to a file conv.WriteFile(mol,outname) conv.CloseOutFile() opt_coords = read_xyz(outname).atomcoords[0] if check: success = scramble_check(opt_coords, atomnos, constrained_indexes, graphs) else: success = True return opt_coords, energy, success
def neb_operator(filename, embedder): ''' ''' embedder.t_start_run = time.perf_counter() data = read_xyz(filename) assert len( data.atomcoords ) == 2, 'NEB calculations need a .xyz input file with two geometries.' from tscode.ase_manipulations import ase_neb, ase_popt reagents, products = data.atomcoords title = filename[:-4] + '_NEB' embedder.log( f'--> Performing a NEB TS optimization. Using start and end points from {filename}\n' f'Theory level is {embedder.options.theory_level} via {embedder.options.calculator}' ) print('Getting start point energy...', end='\r') _, reag_energy, _ = ase_popt(embedder, reagents, data.atomnos, steps=0) print('Getting end point energy...', end='\r') _, prod_energy, _ = ase_popt(embedder, products, data.atomnos, steps=0) ts_coords, ts_energy, _ = ase_neb(embedder, reagents, products, data.atomnos, title=title, logfunction=embedder.log, write_plot=True, verbose_print=True) e1 = ts_energy - reag_energy e2 = ts_energy - prod_energy embedder.log( f'NEB completed, relative energy from start/end points (not barrier heights):\n' f' > E(TS)-E(start): {"+" if e1>=0 else "-"}{round(e1, 3)} kcal/mol\n' f' > E(TS)-E(end) : {"+" if e2>=0 else "-"}{round(e2, 3)} kcal/mol') if not (e1 > 0 and e2 > 0): embedder.log( f'\nNEB failed, TS energy is lower than both the start and end points.\n' ) with open('Me_CONMe2_Mal_tetr_int_NEB_NEB_TS.xyz', 'w') as f: write_xyz(ts_coords, data.atomnos, f, title='NEB TS - see log for relative energies')
def test_anchors(self): lab_dict = {0: 3, 1: 4, 2: 6} for i, mol in enumerate(self.embedder.objects): with open(f'anchor_test_{i}.xyz', 'w') as f: for c, coords in enumerate(mol.atomcoords): centers, _, labels, = docker.get_anchors(mol, conf=c) coords_ = np.concatenate((coords, centers)) atomnos_ = np.concatenate( (mol.atomnos, [lab_dict[l] for l in labels])) write_xyz(coords_, atomnos_, f) self.write_anchor_vmd(mol.atomnos, labels, xyz_name=f'anchor_test_{i}.xyz')
def write_structures(self, tag, indexes=None, energies=True, relative=True, extra='', p=True): ''' ''' if indexes is None: indexes = self.constrained_indexes[0] if energies: rel_e = self.energies if relative: rel_e -= np.min(self.energies) self.outname = f'TSCoDe_{tag}_{self.stamp}.xyz' with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, indexes)): title = f'TS candidate {i+1} - {tag}' if energies: title += f' - Rel. E. = {round(rel_e[i], 3)} kcal/mol ' title += extra write_xyz(structure, self.atomnos, f, title=title) if p: self.log( f'Wrote {len(self.structures)} {tag} TS structures to {self.outname} file.\n' )
# docker.test_anchors() import cProfile from pstats import Stats def profile_run(name): datafile = f"TSCoDe_{name}_cProfile.dat" cProfile.run("docker.dock_structures()", datafile) with open(f"TSCoDe_{name}_cProfile_output_time.txt", "w") as f: p = Stats(datafile, stream=f) p.sort_stats("time").print_stats() with open(f"TSCoDe_{name}_cProfile_output_cumtime.txt", "w") as f: p = Stats(datafile, stream=f) p.sort_stats("cumtime").print_stats() # start = t() # docker.dock_structures() # print('Took %.3f s' % (t()-start)) profile_run('dock') print(f'Found {len(docker.structures)} structs') with open(f'dock_test.xyz', 'w') as f: an = np.concatenate((docker.atomnos, [3 for _ in range(4)])) for c, coords in enumerate(docker.structures): write_xyz(coords, an, f)
def xtb_metadyn_augmentation(coords, atomnos, constrained_indexes=None, new_structures: int = 5, title=0, debug=False): ''' Runs a metadynamics simulation (MTD) through the XTB program to obtain new conformations. The GFN-FF force field is used. ''' with open(f'temp.xyz', 'w') as f: write_xyz(coords, atomnos, f, title='temp') s = ('$md\n' ' time=%s\n' % (new_structures) + ' step=1\n' ' temp=300\n' '$end\n' '$metadyn\n' ' save=%s\n' % (new_structures) + '$end') if constrained_indexes is not None: s += '\n$constrain\n' for a, b in constrained_indexes: s += ' distance: %s, %s, %s\n' % ( a + 1, b + 1, round(norm_of(coords[a] - coords[b]), 5)) s = ''.join(s) with open(f'temp.inp', 'w') as f: f.write(s) try: check_call( f'xtb --md --input temp.inp temp.xyz --gfnff > Structure{title}_MTD.log 2>&1' .split(), stdout=DEVNULL, stderr=STDOUT) except KeyboardInterrupt: print('KeyboardInterrupt requested by user. Quitting.') quit() structures = [coords] for n in range(1, new_structures): name = 'scoord.' + str(n) structures.append(parse_xtb_out(name)) os.remove(name) for filename in ('gfnff_topo', 'xtbmdoc', 'mdrestart'): try: os.remove(filename) except FileNotFoundError: pass # if debug: os.rename('xtb.trj', f'Structure{title}_MTD_traj.xyz') # else: # os.remove('xtb.traj') structures = np.array(structures) return structures
def xtb_opt(coords, atomnos, constrained_indexes=None, method='GFN2-xTB', solvent=None, title='temp', read_output=True, **kwargs): ''' This function writes an XTB .inp file, runs it with the subprocess module and reads its output. :params coords: array of shape (n,3) with cartesian coordinates for atoms. :params atomnos: array of atomic numbers for atoms. :params constrained_indexes: array of shape (n,2), with the indexes of atomic pairs to be constrained. :params method: string, specifiyng the theory level to be used. :params title: string, used as a file name and job title for the mopac input file. :params read_output: Whether to read the output file and return anything. ''' with open(f'{title}.xyz', 'w') as f: write_xyz(coords, atomnos, f, title=title) s = f'$opt\n logfile={title}_opt.log\n$end' if constrained_indexes is not None: s += '\n$constrain\n' for a, b in constrained_indexes: s += ' distance: %s, %s, %s\n' % ( a + 1, b + 1, round(norm_of(coords[a] - coords[b]), 5)) if method.upper() in ('GFN-XTB', 'GFNXTB'): s += '\n$gfn\n method=1\n' elif method.upper() in ('GFN2-XTB', 'GFN2XTB'): s += '\n$gfn\n method=2\n' s += '\n$end' s = ''.join(s) with open(f'{title}.inp', 'w') as f: f.write(s) flags = '--opt' if method in ('GFN-FF', 'GFNFF'): flags += ' tight' # tighter convergence for GFN-FF works better flags += ' --gfnff' # declaring the use of FF instead of semiempirical if solvent is not None: if solvent == 'methanol': flags += f' --gbsa methanol' else: flags += f' --alpb {solvent}' elif method.upper() in ('GFN-FF', 'GFNFF'): flags += f' --alpb thf' try: check_call( f'xtb --input {title}.inp {title}.xyz {flags} > temp.log 2>&1'. split(), stdout=DEVNULL, stderr=STDOUT) except KeyboardInterrupt: print('KeyboardInterrupt requested by user. Quitting.') quit() if read_output: try: outname = 'xtbopt.xyz' opt_coords = read_xyz(outname).atomcoords[0] energy = read_xtb_energy(outname) clean_directory() os.remove(outname) for filename in ('gfnff_topo', 'charges', 'wbo', 'xtbrestart', 'xtbtopo.mol', '.xtboptok'): try: os.remove(filename) except FileNotFoundError: pass return opt_coords, energy, True except FileNotFoundError: return None, None, False
def ase_torsion_TSs(embedder, coords, atomnos, indexes, threshold_kcal=5, title='temp', optimization=True, logfile=None, bernytraj=None, plot=False): ''' ''' assert len(indexes) == 4 # cyclical = False ts_structures, energies = [], [] graph = graphize(coords, atomnos) i1, i2, i3, i4 = indexes if all([len(shortest_path(graph, start, end)) == 2 for start, end in zip(indexes[0:-1], indexes[1:])]): graph.remove_edge(i2, i3) subgraphs = connected_components(graph) for subgraph in subgraphs: if i3 in subgraph: indexes_to_be_moved = subgraph - {i3} break if i1 in indexes_to_be_moved: # cyclical = True indexes_to_be_moved = [i4] # if molecule is cyclical, just move the fourth atom and # let the rest of the structure relax s = 'The specified dihedral angle is comprised within a cycle. Switching to safe dihedral scan (moving only last index).' print(s) if logfile is not None: logfile.write(s+'\n') else: if not embedder.options.let: raise SystemExit('The specified dihedral angle is made up of non-contiguous atoms. To prevent errors, the\n' + 'run has been stopped. Override this behavior with the LET keyword.') # if user did not provide four contiguous indexes, # and did that on purpose, just move the fourth atom and # let the rest of the structure relax indexes_to_be_moved = [i4] # cyclical = True s = 'The specified dihedral angle is made up of non-contiguous atoms.\nThis might cause some unexpected results.' print(s) if logfile is not None: logfile.write(s+'\n') # routine = ((10, 18, '_clockwise'), (-10, 18, '_counterclockwise')) if cyclical else ((10, 36, ''),) routine = ((10, 36, '_clockwise'), (-10, 36, '_counterclockwise')) for degrees, steps, direction in routine: print() if logfile is not None: logfile.write('\n') structures, energies = ase_scan(embedder, coords, atomnos, indexes=indexes, degrees=degrees, steps=steps, relaxed=optimization, indexes_to_be_moved=indexes_to_be_moved, title='Preliminary scan' + ((' (clockwise)' if direction == '_clockwise' \ else ' (counterclockwise)') if direction != '' else ''), logfile=logfile) min_e = min(energies) rel_energies = [e-min_e for e in energies] tag = '_relaxed' if optimization else '_rigid' with open(title + tag + direction + '_scan.xyz', 'w') as outfile: for s, structure in enumerate(align_structures(np.array(structures), indexes[:-1])): write_xyz(structure, atomnos, outfile, title=f'Scan point {s+1}/{len(structures)} - Rel. E = {round(rel_energies[s], 3)} kcal/mol') if plot: import pickle import matplotlib.pyplot as plt fig = plt.figure() x1 = [dihedral(structure[indexes]) for structure in structures] y1 = [e-min_e for e in energies] for i, (x_, y_) in enumerate(get_plot_segments(x1, y1, max_step=abs(degrees)+1)): plt.plot(x_, y_, '-', color='tab:blue', label=('Preliminary SCAN'+direction) if i == 0 else None, linewidth=3, alpha=0.50) peaks_indexes = atropisomer_peaks(energies, min_thr=min_e+threshold_kcal, max_thr=min_e+75) if peaks_indexes: s = 's' if len(peaks_indexes) > 1 else '' print(f'Found {len(peaks_indexes)} peak{s}. Performing accurate scan{s}.\n') if logfile is not None: logfile.write(f'Found {len(peaks_indexes)} peak{s}. Performing accurate scan{s}.\n\n') for p, peak in enumerate(peaks_indexes): sub_structures, sub_energies = ase_scan(embedder, structures[peak-1], atomnos, indexes=indexes, degrees=degrees/10, #1° or -1° steps=20, relaxed=optimization, ad_libitum=True, # goes on until the hill is crossed indexes_to_be_moved=indexes_to_be_moved, title=f'Accurate scan {p+1}/{len(peaks_indexes)}', logfile=logfile) if logfile is not None: logfile.write('\n') if plot: x2 = [dihedral(structure[indexes]) for structure in sub_structures] y2 = [e-min_e for e in sub_energies] for i, (x_, y_) in enumerate(get_plot_segments(x2, y2, max_step=abs(degrees/10)+1)): plt.plot(x_, y_, '-o', color='tab:red', label='Accurate SCAN' if (p == 0 and i == 0) else None, markersize=1, linewidth=2, alpha=0.5) sub_peaks_indexes = atropisomer_peaks(sub_energies, min_thr=threshold_kcal+min_e, max_thr=min_e+75) if sub_peaks_indexes: s = 's' if len(sub_peaks_indexes) > 1 else '' msg = f'Found {len(sub_peaks_indexes)} sub-peak{s}.' if embedder.options.saddle or embedder.options.neb: if embedder.options.saddle: tag = 'saddle' else: tag = 'NEB TS' msg += f'Performing {tag} optimization{s}.' print(msg) if logfile is not None: logfile.write(s+'\n') for s, sub_peak in enumerate(sub_peaks_indexes): if plot: x = dihedral(sub_structures[sub_peak][indexes]) y = sub_energies[sub_peak]-min_e plt.plot(x, y, color='gold', marker='o', label='Maxima' if p == 0 else None, markersize=3) if embedder.options.saddle: loadbar_title = f' > Saddle opt on sub-peak {s+1}/{len(sub_peaks_indexes)}' # loadbar(s+1, len(sub_peaks_indexes), loadbar_title+' '*(29-len(loadbar_title))) print(loadbar_title) optimized_geom, energy, _ = ase_saddle(embedder, sub_structures[sub_peak], atomnos, title=f'Saddle opt - peak {p+1}, sub-peak {s+1}', logfile=logfile, traj=bernytraj+f'_{p+1}_{s+1}.traj' if bernytraj is not None else None) if molecule_check(coords, optimized_geom, atomnos): ts_structures.append(optimized_geom) energies.append(energy) elif embedder.options.neb: loadbar_title = f' > NEB TS opt on sub-peak {s+1}/{len(sub_peaks_indexes)}, {direction[1:]}' drctn = 'clkws' if direction == '_clockwise' else 'ccws' print(loadbar_title) optimized_geom, energy, success = ase_neb(embedder, sub_structures[sub_peak-2], sub_structures[(sub_peak+1)%len(sub_structures)], atomnos, n_images=5, title=f'{title}_NEB_peak_{p+1}_sub-peak_{s+1}_{drctn}', logfunction=embedder.log) if success and molecule_check(coords, optimized_geom, atomnos): ts_structures.append(optimized_geom) energies.append(energy) else: ts_structures.append(sub_structures[sub_peak]) energies.append(sub_energies[sub_peak]) print() else: print('No suitable sub-peaks found.\n') if logfile is not None: logfile.write('No suitable sub-peaks found.\n\n') else: print('No suitable peaks found.\n') if logfile is not None: logfile.write('No suitable peaks found.\n\n') if plot: plt.legend() plt.xlabel(f'Dihedral Angle {tuple(indexes)}') plt.ylabel('Energy (kcal/mol)') pickle.dump(fig, open(f'{title}{direction}_plt.pickle', 'wb')) plt.savefig(f'{title}{direction}_plt.svg') ts_structures = np.array(ts_structures) clean_directory() return ts_structures, energies
def ase_dump(filename, images, atomnos): with open(filename, 'w') as f: for i, image in enumerate(images): coords = image.get_positions() write_xyz(coords, atomnos, f, title=f'{filename[:-4]}_image_{i}')
def hyperneb_refining(self): ''' Performs a clibing-image NEB calculation inferring reagents and products for each structure. ''' self.log( f'--> HyperNEB optimization ({self.options.theory_level} level)') t_start = time.perf_counter() for i, structure in enumerate(self.structures): loadbar(i, len(self.structures), prefix=f'Performing NEB {i+1}/{len(self.structures)} ') t_start_opt = time.perf_counter() try: self.structures[i], self.energies[i], self.exit_status[ i] = hyperNEB(self, structure, self.atomnos, self.ids, self.constrained_indexes[i], title=f'structure_{i+1}') exit_str = 'COMPLETED' if self.exit_status[i] else 'CRASHED' except (MopacReadError, ValueError): # Both are thrown if a MOPAC file read fails, but the former occurs when an internal (TSCoDe) # read fails (getting reagent or product), the latter when an ASE read fails (during NEB) exit_str = 'CRASHED' self.exit_status[i] = False t_end_opt = time.perf_counter() self.log( f' - {self.options.calculator} {self.options.theory_level} NEB optimization: Structure {i+1} - {exit_str} - ({time_to_string(t_end_opt-t_start_opt)})', p=False) loadbar( 1, 1, prefix= f'Performing NEB {len(self.structures)}/{len(self.structures)} ') t_end = time.perf_counter() self.log( f'{self.options.calculator} {self.options.theory_level} NEB optimization took {time_to_string(t_end-t_start)} ({time_to_string((t_end-t_start)/len(self.structures))} per structure)' ) self.log( f'NEB converged for {len([i for i in self.exit_status if i])}/{len(self.structures)} structures\n' ) mask = self.exit_status self.apply_mask(('structures', 'energies', 'exit_status'), mask) ################################################# PRUNING: SIMILARITY (POST NEB) if len(self.structures) != 0: t_start = time.perf_counter() self.structures, mask = prune_conformers( self.structures, self.atomnos, max_rmsd=self.options.pruning_thresh) self.energies = self.energies[mask] t_end = time.perf_counter() if False in mask: self.log( f'Discarded {len([b for b in mask if not b])} candidates for similarity ({len([b for b in mask if b])} left, {time_to_string(t_end-t_start)})' ) self.log() ################################################# NEB XYZ OUTPUT self.energies -= np.min(self.energies) _, sequence = zip( *sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0])) self.energies = scramble(self.energies, sequence) self.structures = scramble(self.structures, sequence) self.constrained_indexes = scramble(self.constrained_indexes, sequence) # sorting structures based on energy self.outname = f'TSCoDe_NEB_TSs_{self.stamp}.xyz' with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, self.constrained_indexes[0])): write_xyz( structure, self.atomnos, f, title= f'Structure {i+1} - TS - Rel. E. = {round(self.energies[i], 3)} kcal/mol' ) self.log( f'Wrote {len(self.structures)} final TS structures to {self.outname} file\n' )
def optimization_refining(self): ''' Refines structures by constrained optimizations with the active calculator, discarding similar ones and scrambled ones. ''' t_start = time.perf_counter() self.log( f'--> Structure optimization ({self.options.theory_level} level via {self.options.calculator})' ) if self.options.calculator == 'MOPAC': method = f'{self.options.theory_level} GEO-OK CYCLES=500' else: method = f'{self.options.theory_level}' for i, structure in enumerate(deepcopy(self.structures)): loadbar( i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ') try: t_start_opt = time.perf_counter() new_structure, self.energies[i], self.exit_status[ i] = optimize( structure, self.atomnos, self.options.calculator, method=method, constrained_indexes=self.constrained_indexes[i], mols_graphs=self.graphs, procs=self.options.procs, max_newbonds=self.options.max_newbonds, check=(self.embed != 'prune')) if self.exit_status[i]: self.structures[i] = new_structure exit_str = 'CONVERGED' if self.exit_status[i] else 'SCRAMBLED' except MopacReadError: # ase will throw a ValueError if the output lacks a space in the "FINAL POINTS AND DERIVATIVES" table. # This occurs when one or more of them is not defined, that is when the calculation did not end well. # The easiest solution is to reject the structure and go on. self.energies[i] = np.inf self.exit_status[i] = False exit_str = 'FAILED TO READ FILE' except Exception as e: raise e self.log(( f' - {self.options.calculator} {self.options.theory_level} optimization: Structure {i+1} {exit_str} - ' f'took {time_to_string(time.perf_counter()-t_start_opt)}'), p=False) loadbar( 1, 1, prefix= f'Optimizing structure {len(self.structures)}/{len(self.structures)} ' ) self.log( f'Successfully optimized {len([b for b in self.exit_status if b])}/{len(self.structures)} structures. Non-optimized ones will not be discarded.' ) self.log(( f'{self.options.calculator} {self.options.theory_level} optimization took ' f'{time_to_string(time.perf_counter()-t_start)} (~{time_to_string((time.perf_counter()-t_start)/len(self.structures))} per structure)' )) ################################################# PRUNING: SIMILARITY (POST SEMIEMPIRICAL OPT) self.zero_candidates_check() self.similarity_refining() ################################################# REFINING: BONDING DISTANCES if self.embed != 'prune': self.write_structures('TS_guesses_unrefined', energies=False, p=False) self.log( f'--> Checkpoint output - Updated {len(self.structures)} TS structures before distance refinement.\n' ) self.log( f'--> Refining bonding distances for TSs ({self.options.theory_level} level)' ) if self.options.ff_opt: try: os.remove(f'TSCoDe_checkpoint_{self.stamp}.xyz') # We don't need the pre-optimized structures anymore except FileNotFoundError: pass self._set_target_distances() t_start = time.perf_counter() for i, structure in enumerate(deepcopy(self.structures)): loadbar( i, len(self.structures), prefix=f'Refining structure {i+1}/{len(self.structures)} ') try: traj = f'refine_{i}.traj' if self.options.debug else None new_structure, new_energy, self.exit_status[ i] = ase_adjust_spacings(self, structure, self.atomnos, self.constrained_indexes[i], title=i, traj=traj) if self.exit_status[i]: self.structures[i] = new_structure self.energies[i] = new_energy except ValueError as e: # ase will throw a ValueError if the output lacks a space in the "FINAL POINTS AND DERIVATIVES" table. # This occurs when one or more of them is not defined, that is when the calculation did not end well. # The easiest solution is to reject the structure and go on. self.log(repr(e)) self.log( f'Failed to read MOPAC file for Structure {i+1}, skipping distance refinement', p=False) loadbar(1, 1, prefix=f'Refining structure {i+1}/{len(self.structures)} ') t_end = time.perf_counter() self.log( f'{self.options.calculator} {self.options.theory_level} refinement took {time_to_string(t_end-t_start)} (~{time_to_string((t_end-t_start)/len(self.structures))} per structure)' ) before = len(self.structures) if self.options.only_refined: mask = self.exit_status self.apply_mask(('structures', 'energies', 'exit_status', 'constrained_indexes'), mask) s = f'Discarded {len([i for i in mask if not i])} unrefined structures.' else: s = 'Non-refined ones will not be discarded.' self.log( f'Successfully refined {len([i for i in self.exit_status if i])}/{before} structures. {s}' ) ################################################# PRUNING: SIMILARITY (POST REFINEMENT) self.zero_candidates_check() self.similarity_refining() ################################################# PRUNING: FITNESS self.fitness_refining() ################################################# PRUNING: ENERGY self.energies = self.energies - np.min(self.energies) _, sequence = zip(*sorted( zip(self.energies, range(len(self.energies))), key=lambda x: x[0])) self.energies = self.scramble(self.energies, sequence) self.structures = self.scramble(self.structures, sequence) self.constrained_indexes = self.scramble(self.constrained_indexes, sequence) # sorting structures based on energy if self.options.kcal_thresh is not None: mask = (self.energies - np.min(self.energies)) < self.options.kcal_thresh self.apply_mask(('structures', 'energies', 'exit_status'), mask) if False in mask: self.log( f'Discarded {len([b for b in mask if not b])} candidates for energy (Threshold set to {self.options.kcal_thresh} kcal/mol)' ) ################################################# XYZ GUESSES OUTPUT self.outname = f'TSCoDe_TS_guesses_{self.stamp}.xyz' with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, self.constrained_indexes[0])): kind = 'REFINED - ' if self.exit_status[i] else 'NOT REFINED - ' write_xyz( structure, self.atomnos, f, title= f'Structure {i+1} - {kind}Rel. E. = {round(self.energies[i], 3)} kcal/mol' ) try: os.remove(f'TSCoDe_TS_guesses_unrefined_{self.stamp}.xyz') # since we have the refined structures, we can get rid of the unrefined ones except FileNotFoundError: pass self.log( f'Wrote {len(self.structures)} rough TS structures to {self.outname} file.\n' )
def scan_operator(filename, embedder): ''' ''' embedder.t_start_run = time.perf_counter() mol = embedder.objects[0] assert len(mol.atomcoords ) == 1, 'The scan> operator works on a single .xyz geometry.' assert len(mol.reactive_indexes ) == 2, 'The scan> operator needs two reactive indexes ' + ( f'({len(mol.reactive_indexes)} were provided)') import matplotlib.pyplot as plt from tscode.algebra import norm_of from tscode.ase_manipulations import ase_popt from tscode.pt import pt i1, i2 = mol.reactive_indexes coords = mol.atomcoords[0] # shorthands for clearer code embedder.log( f'--> Performing a distance scan approaching on indexes {i1} ' + f'and {i2}.\nTheory level is {embedder.options.theory_level} ' + f'via {embedder.options.calculator}') d = norm_of(coords[i1] - coords[i2]) # getting the start distance between scan indexes and start energy dists, energies, structures = [], [], [] # creating a dictionary that will hold results # and the structure output list step = -0.05 # defining the step magnitude, in Angstroms s1, s2 = mol.atomnos[[i1, i2]] smallest_d = 0.8 * (pt[s1].covalent_radius + pt[s2].covalent_radius) max_iterations = round((d - smallest_d) / abs(step)) # defining the maximum number of iterations, # so that atoms are never forced closer than # a proportionally small distance between those two atoms. for i in range(max_iterations): coords, energy, _ = ase_popt( embedder, coords, mol.atomnos, constrained_indexes=np.array([mol.reactive_indexes]), targets=(d, ), title=f'Step {i+1}/{max_iterations} - d={round(d, 2)} A -', logfunction=embedder.log, traj=f'{mol.title}_scanpoint_{i+1}.traj' if embedder.options.debug else None, ) # optimizing the structure with a spring constraint if i == 0: e_0 = energy energies.append(energy - e_0) dists.append(d) structures.append(coords) # saving the structure, distance and relative energy d += step # modify the target distance and reiterate ### Start the plotting sequence plt.figure() plt.plot( dists, energies, color='tab:red', label='Scan energy', linewidth=3, ) # e_max = max(energies) id_max = get_scan_peak_index(energies) e_max = energies[id_max] # id_max = energies.index(e_max) d_opt = dists[id_max] plt.plot( d_opt, e_max, color='gold', label='Energy maximum (TS guess)', marker='o', markersize=3, ) title = mol.name + ' distance scan' plt.legend() plt.title(title) plt.xlabel(f'Indexes {i1}-{i2} distance (A)') plt.gca().invert_xaxis() plt.ylabel('Rel. E. (kcal/mol)') plt.savefig(f'{title.replace(" ", "_")}_plt.svg') ### Start structure writing with open(f'{mol.name[:-4]}_scan.xyz', 'w') as f: for i, (s, d, e) in enumerate(zip(structures, dists, energies)): write_xyz( s, mol.atomnos, f, title=f'Scan point {i+1}/{len(structures)} ' + f'- d({i1}-{i2}) = {round(d, 3)} A - Rel. E = {round(e, 3)} kcal/mol' ) # print all scan structures with open(f'{mol.name[:-4]}_scan_max.xyz', 'w') as f: s = structures[id_max] d = dists[id_max] write_xyz( s, mol.atomnos, f, title=f'Scan point {id_max+1}/{len(structures)} ' + f'- d({i1}-{i2}) = {round(d, 3)} A - Rel. E = {round(e_max, 3)} kcal/mol' ) # print the maximum on another file for convienience embedder.log( f'\n--> Written {len(structures)} structures to {mol.name[:-4]}_scan.xyz' ) embedder.log( f'\n--> Written energy maximum to {mol.name[:-4]}_scan_max.xyz')
def force_field_refining(self): ''' Performs structural optimizations with the embedder force field caculator. Only structures that do not scramble during FF optimization are updated, while the rest are kept as they are. ''' ################################################# CHECKPOINT BEFORE FF OPTIMIZATION self.outname = f'TSCoDe_checkpoint_{self.stamp}.xyz' with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, self.constrained_indexes[0])): write_xyz( structure, self.atomnos, f, title= f'TS candidate {i+1} - Checkpoint before FF optimization') self.log( f'\n--> Checkpoint output - Wrote {len(self.structures)} TS structures to {self.outname} file before FF optimization.\n' ) ################################################# GEOMETRY OPTIMIZATION - FORCE FIELD self.log( f'--> Structure optimization ({self.options.ff_level} level via {self.options.ff_calc})' ) t_start = time.perf_counter() for i, structure in enumerate(deepcopy(self.structures)): loadbar( i, len(self.structures), prefix=f'Optimizing structure {i+1}/{len(self.structures)} ') try: new_structure, _, self.exit_status[i] = optimize( structure, self.atomnos, self.options.ff_calc, method=self.options.ff_level, constrained_indexes=self.constrained_indexes[i], mols_graphs=self.graphs, check=(self.embed != 'prune')) if self.exit_status[i]: self.structures[i] = new_structure except Exception as e: raise e loadbar( 1, 1, prefix= f'Optimizing structure {len(self.structures)}/{len(self.structures)} ' ) t_end = time.perf_counter() self.log( f'Force Field {self.options.ff_level} optimization took {time_to_string(t_end-t_start)} (~{time_to_string((t_end-t_start)/len(self.structures))} per structure)' ) ################################################# EXIT STATUS self.log( f'Successfully pre-refined {len([b for b in self.exit_status if b])}/{len(self.structures)} candidates at {self.options.ff_level} level.' ) ################################################# PRUNING: SIMILARITY (POST FORCE FIELD OPT) self.zero_candidates_check() self.similarity_refining() ################################################# CHECKPOINT BEFORE OPTIMIZATION with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, self.constrained_indexes[0])): exit_str = f'{self.options.ff_level} REFINED' if self.exit_status[ i] else 'RAW' write_xyz( structure, self.atomnos, f, title= f'TS candidate {i+1} - {exit_str} - Checkpoint before {self.options.calculator} optimization' ) self.log( f'--> Checkpoint output - Updated {len(self.structures)} TS structures to {self.outname} file before {self.options.calculator} optimization.\n' )
def saddle_refining(self): ''' Performs a first order saddle optimization for each structure. ''' self.log( f'--> Saddle optimization ({self.options.theory_level} level)') t_start = time.perf_counter() for i, structure in enumerate(self.structures): loadbar( i, len(self.structures), prefix=f'Performing saddle opt {i+1}/{len(self.structures)} ') try: self.structures[i], self.energies[i], self.exit_status[ i] = ase_saddle(self, structure, self.atomnos, self.constrained_indexes[i], mols_graphs=self.graphs if self.embed != 'monomolecular' else None, title=f'Saddle opt - Structure {i+1}', logfile=self.logfile, traj=f'Saddle_opt_{i+1}.traj', maxiterations=200) except ValueError: # Thrown when an ASE read fails (during saddle opt) self.exit_status[i] = False loadbar( 1, 1, prefix= f'Performing saddle opt {len(self.structures)}/{len(self.structures)} ' ) t_end = time.perf_counter() self.log( f'{self.options.calculator} {self.options.theory_level} saddle optimization took {time_to_string(t_end-t_start)} ({time_to_string((t_end-t_start)/len(self.structures))} per structure)' ) self.log( f'Saddle opt completed for {len([i for i in self.exit_status if i])}/{len(self.structures)} structures' ) mask = self.exit_status self.apply_mask(('structures', 'energies', 'exit_status'), mask) ################################################# PRUNING: SIMILARITY (POST SADDLE OPT) if len(self.structures) != 0: t_start = time.perf_counter() self.structures, mask = prune_conformers( self.structures, self.atomnos, max_rmsd=self.options.pruning_thresh) self.apply_mask(('energies', 'exit_status'), mask) t_end = time.perf_counter() if False in mask: self.log( f'Discarded {len([b for b in mask if not b])} candidates for similarity ({len([b for b in mask if b])} left, {time_to_string(t_end-t_start)})' ) self.log() ################################################# SADDLE OPT EXTRA XYZ OUTPUT self.energies -= np.min(self.energies) _, sequence = zip( *sorted(zip(self.energies, range(len(self.energies))), key=lambda x: x[0])) self.energies = scramble(self.energies, sequence) self.structures = scramble(self.structures, sequence) self.constrained_indexes = scramble(self.constrained_indexes, sequence) # sorting structures based on energy self.outname = f'TSCoDe_SADDLE_TSs_{self.stamp}.xyz' with open(self.outname, 'w') as f: for i, structure in enumerate( align_structures(self.structures, self.constrained_indexes[0])): write_xyz( structure, self.atomnos, f, title= f'Structure {i+1} - TS - Rel. E. = {round(self.energies[i], 3)} kcal/mol' ) self.log( f'Wrote {len(self.structures)} saddle-optimized structures to {self.outname} file\n' ) else: self.log()
def csearch_operator(filename, embedder): ''' ''' embedder.log(f'--> Performing conformational search on {filename}') t_start = time.perf_counter() data = read_xyz(filename) if len(data.atomcoords) > 1: embedder.log( f'Requested conformational search on multimolecular file - will do\n' + 'an individual search from each conformer (might be time-consuming).' ) calc, method, procs = _get_lowest_calc(embedder) conformers = [] for i, coords in enumerate(data.atomcoords): opt_coords = optimize( coords, data.atomnos, calculator=calc, method=method, procs=procs)[0] if embedder.options.optimization else coords # optimize starting structure before running csearch conf_batch = clustered_csearch(opt_coords, data.atomnos, title=f'{filename}, conformer {i+1}', logfunction=embedder.log) # generate the most diverse conformers starting from optimized geometry conformers.append(conf_batch) conformers = np.array(conformers) batch_size = conformers.shape[1] conformers = conformers.reshape(-1, data.atomnos.shape[0], 3) # merging structures from each run in a single array if embedder.embed is not None: embedder.log( f'\nSelected the most diverse {batch_size} out of {conformers.shape[0]} conformers for {filename} ({time_to_string(time.perf_counter()-t_start)})' ) conformers = most_diverse_conformers(batch_size, conformers, data.atomnos) confname = filename[:-4] + '_confs.xyz' with open(confname, 'w') as f: for i, conformer in enumerate(conformers): write_xyz(conformer, data.atomnos, f, title=f'Generated conformer {i}') # if len(conformers) > 10 and not embedder.options.let: # s += f' Will use only the best 10 conformers for TSCoDe embed.' # embedder.log(s) embedder.log('\n') return confname
def opt_linear_scan(embedder, coords, atomnos, scan_indexes, constrained_indexes, step_size=0.02, safe=False, title='temp', logfile=None, xyztraj=None): ''' Runs a linear scan along the specified linear coordinate. The highest energy structure that passes sanity checks is returned. embedder coords atomnos scan_indexes constrained_indexes step_size safe title logfile xyztraj ''' assert [i in constrained_indexes.ravel() for i in scan_indexes] i1, i2 = scan_indexes far_thr = 2 * sum([pt[atomnos[i]].covalent_radius for i in scan_indexes]) t_start = time.perf_counter() total_iter = 0 _, energy, _ = optimize( coords, atomnos, embedder.options.calculator, embedder.options.theory_level, constrained_indexes=constrained_indexes, mols_graphs=embedder.graphs, procs=embedder.options.procs, max_newbonds=embedder.options.max_newbonds, ) direction = coords[i1] - coords[i2] base_dist = norm_of(direction) energies, geometries = [energy], [coords] for sign in (1, -1): # getting closer for sign == 1, further apart for -1 active_coords = deepcopy(coords) dist = base_dist if scan_peak_present(energies): break for iterations in range(75): if safe: # use ASE optimization function - more reliable, but locks all interatomic dists targets = [ norm_of(active_coords[a] - active_coords[b]) - step_size if (a in scan_indexes and b in scan_indexes) else norm_of(active_coords[a] - active_coords[b]) for a, b in constrained_indexes ] active_coords, energy, success = ase_popt( embedder, active_coords, atomnos, constrained_indexes, targets=targets, safe=True, ) else: # use faster raw optimization function, might scramble more often than the ASE one active_coords[i2] += sign * norm(direction) * step_size active_coords, energy, success = optimize( active_coords, atomnos, embedder.options.calculator, embedder.options.theory_level, constrained_indexes=constrained_indexes, mols_graphs=embedder.graphs, procs=embedder.options.procs, max_newbonds=embedder.options.max_newbonds, ) if not success: if logfile is not None and iterations == 0: logfile.write(f' - {title} CRASHED at first step\n') if embedder.options.debug: with open(title + '_SCRAMBLED.xyz', 'a') as f: write_xyz( active_coords, atomnos, f, title=title + (f' d({i1}-{i2}) = {round(dist, 3)} A, Rel. E = {round(energy-energies[0], 3)} kcal/mol' )) break direction = active_coords[i1] - active_coords[i2] dist = norm_of(direction) total_iter += 1 geometries.append(active_coords) energies.append(energy) if xyztraj is not None: with open(xyztraj, 'a') as f: write_xyz( active_coords, atomnos, f, title=title + (f' d({i1}-{i2}) = {round(dist, 3)} A, Rel. E = {round(energy-energies[0], 3)} kcal/mol' )) if (dist < 1.2 and sign == 1) or (dist > far_thr and sign == -1) or (scan_peak_present(energies)): break distances = [norm_of(g[i1] - g[i2]) for g in geometries] best_distance = distances[energies.index(max(energies))] distances_delta = [abs(d - best_distance) for d in distances] closest_geom = geometries[distances_delta.index(min(distances_delta))] closest_dist = distances[distances_delta.index(min(distances_delta))] direction = closest_geom[i1] - closest_geom[i2] closest_geom[i1] += norm(direction) * (best_distance - closest_dist) final_geom, final_energy, _ = optimize( closest_geom, atomnos, embedder.options.calculator, embedder.options.theory_level, constrained_indexes=constrained_indexes, mols_graphs=embedder.graphs, procs=embedder.options.procs, max_newbonds=embedder.options.max_newbonds, check=False, ) if embedder.options.debug: if embedder.options.debug: with open(xyztraj, 'a') as f: write_xyz( active_coords, atomnos, f, title=title + (f' FINAL - d({i1}-{i2}) = {round(norm_of(final_geom[i1]-final_geom[i2]), 3)} A,' f' Rel. E = {round(final_energy-energies[0], 3)} kcal/mol' )) import matplotlib.pyplot as plt plt.figure() distances = [norm_of(geom[i1] - geom[i2]) for geom in geometries] distances, sorted_energies = zip( *sorted(zip(distances, energies), key=lambda x: x[0])) plt.plot(distances, [s - energies[0] for s in sorted_energies], '-o', color='tab:red', label=f'Linear SCAN ({i1}-{i2})', linewidth=3, alpha=0.5) plt.plot( norm_of(coords[i1] - coords[i2]), 0, marker='o', color='tab:blue', label='Starting point (0 kcal/mol)', markersize=5, ) plt.plot(best_distance, final_energy - energies[0], marker='o', color='black', label='Interpolated best distance, actual energy', markersize=5) plt.legend() plt.title(title) plt.xlabel(f'Interatomic distance {tuple(scan_indexes)}') plt.ylabel('Energy Rel. to starting point (kcal/mol)') plt.savefig(f'{title.replace(" ", "_")}_plt.svg') if logfile is not None: logfile.write( f' - {title} COMPLETED {total_iter} steps ({time_to_string(time.perf_counter()-t_start)})\n' ) return final_geom, final_energy, True