def write_fb_target_abinitio(self, records): """ Write a list of {'energy': xxx, 'molecule': xxx, 'name': xxx} records into a new target folder """ # prepare folder for writing target_name = 'abinitio_bond_angles' target_folder = os.path.join(self.out_folder, target_name) os.mkdir(target_folder) os.chdir(target_folder) # load data into a fb Molecule out_m = Molecule() out_m.elem = self.m.elem.copy() out_m.xyzs = [] out_m.qm_energies = [] out_m.comms = [] for record in records: qcmol = record['molecule'] energy = record['energy'] name = record.get('name', 'created by FBTargetBuilder') m = self.qc_molecule_to_fb_molecule(qcmol) assert m.elem == out_m.elem, 'Elements list of resulting qcmol is not consistent with self.m' # append geometry out_m.xyzs.append(m.xyzs[0]) # append energy out_m.qm_energies.append(energy) # append name out_m.comms.append(name) # write output print( f"Writing {len(records)} frames into targets/abinitio_bond_angles/traj.xyz" ) out_m.write('traj.xyz') print( f"Writing {len(records)} frames into targets/abinitio_bond_angles/qdata.txt" ) out_m.write('qdata.txt')
def gather_generations(): shots = Molecule('shots.gro') qdata = Molecule('qdata.txt') A1 = np.array(shots.xyzs) A2 = np.array(qdata.xyzs) if A1.shape != A2.shape: raise Exception('shots.gro and qdata.txt appear to contain different data') elif np.max(np.abs((A1 - A2).flatten())) > 1e-4: raise Exception('shots.gro and qdata.txt appear to contain different xyz coordinates') shots.qm_energies = qdata.qm_energies shots.qm_forces = qdata.qm_forces shots.qm_espxyzs = qdata.qm_espxyzs shots.qm_espvals = qdata.qm_espvals First = True if First: All = shots else: All += shots return All
def write_molecule_files(molecule_data_list): molecule, e0 = molecule_data_list[0] qcjson_mol = molecule.dict(encoding='json') oemol = cmiles.utils.load_molecule(qcjson_mol) # write the mol2 file using oechem ofs.open(f'input.mol2') oechem.OEWriteMolecule(ofs, oemol) ofs.close() # write the pdb file using ForceBalance Molecule fbmol = Molecule(f'input.mol2') fbmol.write(f'conf.pdb') # write xyz file using a new ForceBalance Molecule object m = Molecule() m.elem = [Elements[i] for i in molecule.atomic_numbers] m.xyzs = [] m.qm_energies = [] for mol, e in molecule_data_list: m.xyzs.append(mol.geometry * bohr2ang) m.qm_energies.append(e) m.write("coords.xyz") # write qdata.txt file with coords and energies m.write('qdata.txt')
def gen_tid_calculated_molecules_list(torsiondrive_data, forcefield, verbose=False): # gen dictionary with keys, including all tids in the input forcefield ff_torsion_param_list = forcefield.get_parameter_handler( 'ProperTorsions').parameters tid_calculated_molecules_list = {} molecules_list_dict_from_td = defaultdict = {} for torsion_param in ff_torsion_param_list: tid_calculated_molecules_list[torsion_param.id] = [] if os.path.exists('tmp'): shutil.rmtree('tmp') os.mkdir('tmp') os.chdir('tmp') for entry_index, td_data in torsiondrive_data.items(): # pick a single initial molecule qcmol = td_data['initial_molecules'][0] # write input.mol2 file qcjson_mol = qcmol.dict(encoding='json') oemol = cmiles.utils.load_molecule(qcjson_mol) ofs.open(f'input.mol2') oechem.OEWriteMolecule(ofs, oemol) ofs.close() # test mol2 file success, msg, molecule_labels = test_ff_mol2(forcefield, 'input.mol2') if not success: if verbose == True: print( 'Error occured while testing input.mol2. Excluded in tid_calculated_molecules_list. ' ) continue # check if the torsion scan contains one or more conformers forming strong internal H bonds if success: # write conf.pdb file fbmol = FBMolecule(f'input.mol2') # list of grid ids sorted sorted_grid_ids = sorted(td_data['final_molecules'].keys()) # write scan.xyz target_mol = FBMolecule() target_mol.elem = fbmol.elem target_mol.xyzs = [] target_mol.qm_energies = [] target_mol.qm_grads = [] for grid_id in sorted_grid_ids: grid_qc_mol = td_data['final_molecules'][grid_id] # convert geometry unit Bohr -> Angstrom geo = grid_qc_mol.geometry * 0.529177 target_mol.xyzs.append(geo) # add energy and gradient target_mol.qm_energies.append( td_data['final_energies'][grid_id]) target_mol.qm_grads.append(td_data['final_gradients'][grid_id]) target_mol.write('scan.xyz') no_hbonds = check_Hbond(scan_fnm='scan.xyz', top_fnm='input.mol2') if not no_hbonds: if verbose == True: print( 'Internal hydrogen bond detacted. Excluded in tid_calculated_molecules_list. ' ) success = False if success: mol_index = td_data['attributes']["canonical_isomeric_smiles"] indices = td_data['keywords']['dihedrals'][0] tid = molecule_labels['ProperTorsions'][tuple(indices)].id # qcschema_molecules = [qcmol.dict(encoding='json') for qcmol in td_data['initial_molecules']] tid_calculated_molecules_list[tid].append({ 'mol_index': mol_index, 'indices': indices }) qcschema_molecules = [] for qcmol in td_data['initial_molecules']: j_dict = qcmol.dict(encoding='json') qcschema_molecule = { 'symbols': j_dict['symbols'], 'geometry': j_dict['geometry'], 'connectivity': j_dict['connectivity'], 'molecular_charge': j_dict['molecular_charge'], 'molecular_multiplicity': j_dict['molecular_multiplicity'] } qcschema_molecules.append(qcschema_molecule) molecules_list_dict_from_td[mol_index] = qcschema_molecules print("\n## Available torsion scans from QCArchive ##\n" + '-' * 90) print(f"{'idx':<7} {'tid':7s} {'Number of torsion scans'}") for idx, (tid, molecules_list) in enumerate( tid_calculated_molecules_list.items()): if len(molecules_list) > 0: print(f'{idx:<7} {tid:7s} {len(molecules_list)}') print('-' * 90) os.chdir('..') shutil.rmtree('tmp') return tid_calculated_molecules_list, molecules_list_dict_from_td
def make_fb_targets(): result_mol_folders = [ os.path.join(results_folder, f) for f in os.listdir(results_folder) if os.path.isdir(os.path.join(results_folder, f)) ] result_mol_folders.sort() print( f"\nLoading data from {len(result_mol_folders)} result folders under {results_folder}" ) # output folder if os.path.exists(out_folder): shutil.rmtree(out_folder) os.mkdir(out_folder) target_names = [] for mol_folder in result_mol_folders: mol_name = os.path.basename(mol_folder) # the name of the molecules should be consistent with the mol_folder mol_file = os.path.join(molecules_folder, mol_name + '.mol2') molecule = Molecule(mol_file) # find all torsion data finished_scans = [] for f in os.listdir(mol_folder): name, ext = os.path.splitext(f) if ext == '.xyz': finished_scans.append(name) if len(finished_scans) == 0: print(f'No finished scans found in {mol_folder}') continue # output target name target_name = 'td_' + mol_name target_names.append(target_name) # make target folder this_target_folder = os.path.join(out_folder, target_name) os.mkdir(this_target_folder) # read data from each finished scans target_mol = Molecule() target_mol.elem = molecule.elem target_mol.xyzs = [] target_mol.qm_energies = [] target_mol.qm_grads = [] for f in finished_scans: xyz_file = os.path.join(mol_folder, f + '.xyz') m = Molecule(xyz_file) target_mol.xyzs += m.xyzs # read energy from comment line energies = [float(comm.split()[-1]) for comm in m.comms] target_mol.qm_energies += energies # read gradient grad_file = os.path.join(mol_folder, f + '.gradxyz') grads = read_gradxyz(grad_file) target_mol.qm_grads += grads # write qdata.txt target_mol.write(os.path.join(this_target_folder, 'qdata.txt')) # write scan.xyz target_mol.write(os.path.join(this_target_folder, 'scan.xyz')) # write pdb molecule.write(os.path.join(this_target_folder, 'conf.pdb')) # copy mol2 file shutil.copyfile(mol_file, os.path.join(this_target_folder, 'input.mol2')) # write a note with open(os.path.join(this_target_folder, 'notes.txt'), 'w') as fnote: fnote.write( "Notes: This target is made by make_fb_targets.py, using data from\n" ) fnote.write(mol_file + '\n') for f in finished_scans: xyz_file = os.path.join(mol_folder, f + '.xyz') grad_file = os.path.join(mol_folder, f + '.gradxyz') fnote.write(xyz_file + '\n') fnote.write(grad_file + '\n') # write a target.in file for use in ForceBalance input with open(os.path.join(out_folder, 'targets.in'), 'w') as fout: for tname in target_names: fout.write(target_str.format(name=tname) + '\n') print(f"Targets generation finished!") print( f"You can copy contents in {os.path.join(out_folder, 'targets.in')} to your ForceBalance input file." )
def make_torsiondrive_target(dataset_name, torsiondrive_data, test_ff=None): """ Make a folder of ForceBalance targets from the torsiondrive data """ target_name_prefix = 'td_' + dataset_name.replace(' ', '_') # create new targets folder if os.path.exists('targets'): shutil.rmtree('targets') os.mkdir('targets') os.chdir('targets') # write each entry as an individual target target_idx = 0 n_targets = len(torsiondrive_data) idx_fmt_string = get_int_fmt_string(n_targets) target_names = [] print(f"Generating {n_targets} targets") for entry_index, td_data in torsiondrive_data.items(): # pick a single initial molecule qcmol = td_data['initial_molecules'][0] # get mol_formula mol_formula = qcmol.get_molecular_formula() # create target folder target_idx_str = idx_fmt_string.format(target_idx) target_name = f"{target_name_prefix}_{target_idx_str}_{mol_formula}" print(f"{target_idx}: {target_name}") os.mkdir(target_name) os.chdir(target_name) # write a note with open('note.txt', 'w') as notefile: notefile.write( f'Target generated from dataset {dataset_name}, entry {entry_index}' ) # write input.mol2 file qcjson_mol = qcmol.dict(encoding='json') oemol = cmiles.utils.load_molecule(qcjson_mol) ofs.open(f'input.mol2') oechem.OEWriteMolecule(ofs, oemol) ofs.close() # test mol2 file success = True if test_ff != None: success, msg, molecule_labels = test_ff_mol2(test_ff, 'input.mol2') if not success: if not os.path.exists('../error_mol2s'): os.mkdir('../error_mol2s') shutil.move(f'input.mol2', f'../error_mol2s/{target_name}.mol2') with open(f'../error_mol2s/{target_name}_error.txt', 'w') as notefile: notefile.write(f'{dataset_name}\ntarget_name {target_name}\n') notefile.write( f'entry {entry_index}\ntd_keywords {td_data["keywords"]}\n' ) notefile.write(f'error message:\n{msg}') # remove this folder os.chdir('..') shutil.rmtree(target_name) else: # write conf.pdb file fbmol = Molecule(f'input.mol2') fbmol.write(f'conf.pdb') # list of grid ids sorted sorted_grid_ids = sorted(td_data['final_molecules'].keys()) # write scan.xyz and qdata.txt files target_mol = Molecule() target_mol.elem = fbmol.elem target_mol.xyzs = [] target_mol.qm_energies = [] target_mol.qm_grads = [] for grid_id in sorted_grid_ids: grid_qc_mol = td_data['final_molecules'][grid_id] # convert geometry unit Bohr -> Angstrom geo = grid_qc_mol.geometry * 0.529177 target_mol.xyzs.append(geo) # add energy and gradient target_mol.qm_energies.append( td_data['final_energies'][grid_id]) target_mol.qm_grads.append(td_data['final_gradients'][grid_id]) target_mol.write('scan.xyz') target_mol.write('qdata.txt') # check if the torsion scan contains one or more conformers forming strong internal H bonds no_hbonds, hbonds = screening_Hbond(mol2_fnm='input.mol2', scan_fnm='scan.xyz') if no_hbonds != True: msg = 'One or more internal H bonds exist.' if not os.path.exists('../error_mol2s'): os.mkdir('../error_mol2s') shutil.move(f'input.mol2', f'../error_mol2s/{target_name}.mol2') with open(f'../error_mol2s/{target_name}_error.txt', 'w') as notefile: notefile.write( f'{dataset_name}\ntarget_name {target_name}\n') notefile.write( f'entry {entry_index}\ntd_keywords {td_data["keywords"]}\n' ) notefile.write(f'error message:\n{msg}') # remove this folder os.chdir('..') shutil.rmtree(target_name) else: # pick metadata to write into the metadata.json file metadata = copy.deepcopy(td_data['keywords']) metadata['dataset_name'] = dataset_name metadata['entry_label'] = entry_index metadata['canonical_smiles'] = td_data['attributes'].get( 'canonical_smiles', 'unknown') metadata['torsion_grid_ids'] = sorted_grid_ids # find SMIRKs for torsion being scaned if test_ff is provided if test_ff: metadata['smirks'] = [] metadata['smirks_ids'] = [] for torsion_indices in td_data['keywords']['dihedrals']: param = molecule_labels['ProperTorsions'][tuple( torsion_indices)] metadata['smirks'].append(param.smirks) metadata['smirks_ids'].append(param.id) with open('metadata.json', 'w') as jsonfile: json.dump(metadata, jsonfile, indent=2) # finish this target target_names.append(target_name) os.chdir('..') target_idx += 1 # write targets.{dataset_name}.in file target_in_fnm = f"targets.{dataset_name.replace(' ', '_')}.in" with open(target_in_fnm, 'w') as outfile: for target_name in target_names: outfile.write(target_in_str.format(name=target_name)) print(f"Successfull generated {len(target_names)} targets.") print( f"You can copy contents in {target_in_fnm} to your ForceBalance input file." ) os.chdir('..')