class FBTargetBuilder: def __init__(self, mol2_file, client_conf_file, scan_conf_file=None): self.m = Molecule(mol2_file) self.qc_mol = self.fb_molecule_to_qc_molecule(self.m) # create a client from config file self.client = ptl.FractalClient.from_file(client_conf_file) # load scan config from scan_conf_file self.scan_conf = self.load_scan_conf(scan_conf_file) # create output folder self.out_folder = os.path.realpath('targets') if os.path.exists('targets'): raise OSError( "Folder targets/ already exist. Please delete the prevous one") os.mkdir(self.out_folder) def load_scan_conf(self, filename=None): """ Get the scan configuration from a yaml file Parameters ---------- filename: str or None The input scan config filename (in yaml format). If None, default conf will be used. Returns ------- scan_conf: dict """ default_scan_conf = { 'qm_method': 'HF', 'qm_basis': 'sto-3g', 'bond_steps': [-0.2, -0.1, 0.0, 0.1, 0.2], 'angle_steps': [-20, -10, 0, 10, 20], } if filename is None: return copy.deepcopy(default_scan_conf) with open(filename) as infile: conf = yaml.load(infile) # convert keys to lower case conf = {k.lower(): v for k, v in conf.items()} # check redundant and missing keys diff1 = default_scan_conf.keys() - conf.keys() if diff1: raise ValueError( f"Keys missing in scan_config file {filename}:\n {diff1}") diff2 = conf.keys() - default_scan_conf.keys() if diff2: print( f"Warning: Keys in scan_config file {filename} are ignored:\n {diff2}" ) return conf def fb_molecule_to_qc_molecule(self, fb_molecule): """ Convert an forcebalance.molecule.Molecule object to a qcportal.Molecule object""" e_idxs = [Elements.index(i) for i in fb_molecule.elem] coords = fb_molecule.xyzs[0] moldata = [[ei] + coord.tolist() for ei, coord in zip(e_idxs, coords)] return ptl.Molecule.from_data(moldata, dtype="numpy", units="angstrom") def qc_molecule_to_fb_molecule(self, qc_molecule): """ Convert an qcportal.Molecule object to a forcebalance.molecule.Molecule object""" m = Molecule() m.elem = [Elements[i] for i in qc_molecule.atomic_numbers] m.xyzs = [qc_molecule.geometry * bohr2ang] return m def master(self): # submit initial optimization job_id = self.submit_single_optimization(self.qc_mol) self.wait_jobs([job_id]) self.qc_mol = self.get_optimized_molecule(job_id) # submit bond streching jobs grid_opt_jobs = self.submit_bond_streching_jobs() # submit angle bending jobs grid_opt_jobs += self.submit_angle_bending_jobs() # submit vibrational hessian jobs hessian_job = self.submit_vib_hessian_jobs() # wait for grid_opt_jobs to finish self.wait_jobs(grid_opt_jobs) # collect bond streching job data gresult = self.get_grid_optimiztion_results(grid_opt_jobs) # flatten the result dict into a list of {'energy': xxx, 'molecule': xxx} records flat_records = [ record for job_res in gresult.values() for record in job_res.values() ] self.write_fb_target_abinitio(flat_records) # wait for hessian job to finish self.wait_jobs([hessian_job], jobtype='compute') # collect hessian job data hessian_result = self.get_hessian_result(hessian_job) self.write_fb_target_hessian(hessian_result) # finish def submit_single_optimization(self, qc_mol): # submit a single optimization job for a qc molecule # opt_schema = {} # jobId = self.client.add_procedure('optimization', opt_schema) # return jobId options = { "keywords": None, "qc_spec": { "driver": "gradient", "method": self.scan_conf['qm_method'], "basis": self.scan_conf['qm_basis'], "keywords": None, "program": "psi4" }, } print(f"Submitting 1 initial optimization job") mol_ret = self.client.add_molecules([self.qc_mol]) r = self.client.add_procedure("optimization", "geometric", options, mol_ret) assert len(r.ids) == 1 return r.ids[0] def submit_bond_streching_jobs(self): mol_id = self.client.add_molecules([self.qc_mol])[0] grid_opt_option_template = { "keywords": { "preoptimization": False, "scans": [{ "type": "distance", "indices": None, # To be filled "steps": None, # To be filled "step_type": "relative" }] }, "optimization_spec": { "program": "geometric", "keywords": { "coordsys": "tric", "enforce": 0.1, "reset": True, "qccnv": True, "epsilon": 0.0, } }, "qc_spec": { "driver": "gradient", "method": self.scan_conf['qm_method'], "basis": self.scan_conf['qm_basis'], "keywords": None, "program": "psi4", }, "initial_molecule": mol_id, } all_job_options = [] strech_steps = self.scan_conf['bond_steps'] for bond in self.m.bonds: job_option = copy.deepcopy(grid_opt_option_template) job_option['keywords']['scans'][0]['indices'] = list(bond) job_option['keywords']['scans'][0]['steps'] = self.scan_conf[ 'bond_steps'] all_job_options.append(job_option) print( f"Submitting {len(all_job_options)} bond streching grid opt jobs") r = self.client.add_service(all_job_options) return r.ids def submit_angle_bending_jobs(self): mol_id = self.client.add_molecules([self.qc_mol])[0] grid_opt_option_template = { "keywords": { "preoptimization": False, "scans": [{ "type": "angle", "indices": None, # To be filled "steps": None, # To be filled "step_type": "relative" }] }, "optimization_spec": { "program": "geometric", "keywords": { "coordsys": "tric", "enforce": 0.1, "reset": True, "qccnv": True, "epsilon": 0.0, } }, "qc_spec": { "driver": "gradient", "method": self.scan_conf['qm_method'], "basis": self.scan_conf['qm_basis'], "keywords": None, "program": "psi4", }, "initial_molecule": mol_id, } angles = self.m.find_angles() #angle_bend_steps = [v/180*3.14159 for v in angle_bend_steps] all_job_options = [] for angle in angles: job_option = copy.deepcopy(grid_opt_option_template) job_option['keywords']['scans'][0]['indices'] = list(angle) job_option['keywords']['scans'][0]['steps'] = self.scan_conf[ 'angle_steps'] all_job_options.append(job_option) print(f"Submitting {len(all_job_options)} angle bending grid opt jobs") r = self.client.add_service(all_job_options) return r.ids def submit_vib_hessian_jobs(self): mol_id = self.client.add_molecules([self.qc_mol])[0] # submit a hessian job to the server method = self.scan_conf['qm_method'] basis = self.scan_conf['qm_basis'] r = self.client.add_compute("psi4", method, basis, "hessian", None, mol_id) assert len(r.ids) == 1 return r.ids[0] def get_optimized_molecule(self, job_id): # get the optimized molecule from a finished job qr = self.client.query_procedures(id=job_id)[0] return qr.get_final_molecule() def get_grid_optimiztion_results(self, grid_opt_jobs): """ Get results of a list of grid optimization jobs Return a dictionary in this format: { grid_opt_job_1: { grid_id_1: { 'energy': -140.41241, 'molecule': qcMol1, }, grid_id_2: { 'energy': -140.23241, 'molecule': qcMol2, } }, grid_opt_job_2: { ... }, ... } """ res = {} for job_id in grid_opt_jobs: qr = self.client.query_procedures(id=job_id)[0] assert qr.status == 'COMPLETE', f'Job {job_id} should be complete, but it is {qr.status.value}' # get final energies and geometries for each grid energy_dict = qr.get_final_energies() molecule_dict = qr.get_final_molecules() assert set(energy_dict) == set( molecule_dict ), "Keys of energy_dict and molecule_dict should be the same" res[job_id] = {} for key in sorted(energy_dict): res[job_id][key] = { 'energy': energy_dict[key], 'molecule': molecule_dict[key], } return res def get_hessian_result(self, hessian_job_id): """ Get the data from a hessian job """ qr = self.client.query_results(id=hessian_job_id)[0] hessian = np.array(qr.return_result, dtype=float) # reshape hessian into a matrix, also check the dimensions n_of_a = self.m.na hessian = hessian.reshape(n_of_a * 3, n_of_a * 3) # get the qcMol qcmol = self.client.query_molecules(id=qr.molecule)[0] # return a dictionary return {'molecule': qcmol, 'hessian': hessian} def write_fb_target_abinitio(self, records): """ Write a list of {'energy': xxx, 'molecule': xxx, 'name': xxx} records into a new target folder """ # prepare folder for writing target_name = 'abinitio_bond_angles' target_folder = os.path.join(self.out_folder, target_name) os.mkdir(target_folder) os.chdir(target_folder) # load data into a fb Molecule out_m = Molecule() out_m.elem = self.m.elem.copy() out_m.xyzs = [] out_m.qm_energies = [] out_m.comms = [] for record in records: qcmol = record['molecule'] energy = record['energy'] name = record.get('name', 'created by FBTargetBuilder') m = self.qc_molecule_to_fb_molecule(qcmol) assert m.elem == out_m.elem, 'Elements list of resulting qcmol is not consistent with self.m' # append geometry out_m.xyzs.append(m.xyzs[0]) # append energy out_m.qm_energies.append(energy) # append name out_m.comms.append(name) # write output print( f"Writing {len(records)} frames into targets/abinitio_bond_angles/traj.xyz" ) out_m.write('traj.xyz') print( f"Writing {len(records)} frames into targets/abinitio_bond_angles/qdata.txt" ) out_m.write('qdata.txt') def write_fb_target_hessian(self, record): # prepare folder for writing target_name = 'abinitio_hessian' target_folder = os.path.join(self.out_folder, target_name) os.mkdir(target_folder) os.chdir(target_folder) # load data into a fb Molecule qcmol = record['molecule'] out_m = self.qc_molecule_to_fb_molecule(qcmol) out_m.write('geo.xyz') # write hessian matrix hessian = record['hessian'] np.save('hessian', hessian) def wait_jobs(self, job_ids, jobtype='procedure', time_interval=5, verbose=True): assert jobtype in ['compute', 'procedure'] while True: d_status = collections.defaultdict(int) for job_id in job_ids: if jobtype == 'procedure': r = self.client.query_procedures(id=job_id)[0] elif jobtype == 'compute': r = self.client.query_results(id=job_id)[0] status = r.status.value # get string value from RecordStatusEnum if r.status == 'ERROR': print(f"Error found in job {jid}") err = r.get_error() if err is not None: print("Error message:") print(err.err_message) d_status[status] += 1 if verbose: print(' | '.join(f'{status}:{d_status[status]}' for status in d_status)) # check if all jobs finished if d_status['COMPLETE'] == len(job_ids): break else: time.sleep(time_interval) print()
if not is_backbone(dac): for mult in range(1, 7): dstr = "#define torsion_%s_%s_%s_%s_%s_mult%i 0.0 0.0 %i" % (AA, dac[0], dac[1], dac[2], dac[3], mult, mult) if dstr not in aadac[AA]: aadac[AA].append(dstr) print dstr if is_backbone(dac): if args.bk and dac not in alldac: alldac.append(dac[:]) alldac.append(dac[::-1]) elif args.sd and dac not in alldac: alldac.append(dac[:]) alldac.append(dac[::-1]) else: print dac, "is not parameterized" for a in M.find_angles(): if all([M.atomname[i] in anac for i in a]): aac = [anac[M.atomname[i]] for i in a] if args.an and aac not in allaac: allaac.append(aac[:]) allaac.append(aac[::-1]) for b in M.bonds: if all([M.atomname[i] in anac for i in b]): bac = [anac[M.atomname[i]] for i in b] if args.bd and bac not in allbac: allbac.append(bac[:]) allbac.append(bac[::-1]) mode = 'N' foutnm = '%s%s%s' % (os.path.splitext(sys.argv[1])[0], label, os.path.splitext(sys.argv[1])[1]) print "Output file is", foutnm
mult, mult, ) if dstr not in aadac[AA]: aadac[AA].append(dstr) print dstr if is_backbone(dac): if args.bk and dac not in alldac: alldac.append(dac[:]) alldac.append(dac[::-1]) elif args.sd and dac not in alldac: alldac.append(dac[:]) alldac.append(dac[::-1]) else: print dac, "is not parameterized" for a in M.find_angles(): if all([M.atomname[i] in anac for i in a]): aac = [anac[M.atomname[i]] for i in a] if args.an and aac not in allaac: allaac.append(aac[:]) allaac.append(aac[::-1]) for b in M.bonds: if all([M.atomname[i] in anac for i in b]): bac = [anac[M.atomname[i]] for i in b] if args.bd and bac not in allbac: allbac.append(bac[:]) allbac.append(bac[::-1]) mode = "N" foutnm = "%s%s%s" % (os.path.splitext(sys.argv[1])[0], label, os.path.splitext(sys.argv[1])[1]) print "Output file is", foutnm