예제 #1
0
 def write_fb_target_abinitio(self, records):
     """ Write a list of {'energy': xxx, 'molecule': xxx, 'name': xxx} records into a new target folder """
     # prepare folder for writing
     target_name = 'abinitio_bond_angles'
     target_folder = os.path.join(self.out_folder, target_name)
     os.mkdir(target_folder)
     os.chdir(target_folder)
     # load data into a fb Molecule
     out_m = Molecule()
     out_m.elem = self.m.elem.copy()
     out_m.xyzs = []
     out_m.qm_energies = []
     out_m.comms = []
     for record in records:
         qcmol = record['molecule']
         energy = record['energy']
         name = record.get('name', 'created by FBTargetBuilder')
         m = self.qc_molecule_to_fb_molecule(qcmol)
         assert m.elem == out_m.elem, 'Elements list of resulting qcmol is not consistent with self.m'
         # append geometry
         out_m.xyzs.append(m.xyzs[0])
         # append energy
         out_m.qm_energies.append(energy)
         # append name
         out_m.comms.append(name)
     # write output
     print(
         f"Writing {len(records)} frames into targets/abinitio_bond_angles/traj.xyz"
     )
     out_m.write('traj.xyz')
     print(
         f"Writing {len(records)} frames into targets/abinitio_bond_angles/qdata.txt"
     )
     out_m.write('qdata.txt')
예제 #2
0
def gather_generations():
    shots = Molecule('shots.gro')
    qdata = Molecule('qdata.txt')
    A1    = np.array(shots.xyzs)
    A2    = np.array(qdata.xyzs)
    if A1.shape != A2.shape:
        raise Exception('shots.gro and qdata.txt appear to contain different data')
    elif np.max(np.abs((A1 - A2).flatten())) > 1e-4:
        raise Exception('shots.gro and qdata.txt appear to contain different xyz coordinates')
    shots.qm_energies = qdata.qm_energies
    shots.qm_forces   = qdata.qm_forces
    shots.qm_espxyzs     = qdata.qm_espxyzs
    shots.qm_espvals     = qdata.qm_espvals
    First = True
    if First:
        All = shots
    else:
        All += shots
    return All
def write_molecule_files(molecule_data_list):
    molecule, e0 = molecule_data_list[0]
    qcjson_mol = molecule.dict(encoding='json')
    oemol = cmiles.utils.load_molecule(qcjson_mol)
    # write the mol2 file using oechem
    ofs.open(f'input.mol2')
    oechem.OEWriteMolecule(ofs, oemol)
    ofs.close()
    # write the pdb file using ForceBalance Molecule
    fbmol = Molecule(f'input.mol2')
    fbmol.write(f'conf.pdb')
    # write xyz file using a new ForceBalance Molecule object
    m = Molecule()
    m.elem = [Elements[i] for i in molecule.atomic_numbers]
    m.xyzs = []
    m.qm_energies = []
    for mol, e in molecule_data_list:
        m.xyzs.append(mol.geometry * bohr2ang)
        m.qm_energies.append(e)
    m.write("coords.xyz")
    # write qdata.txt file with coords and energies
    m.write('qdata.txt')
def gen_tid_calculated_molecules_list(torsiondrive_data,
                                      forcefield,
                                      verbose=False):

    # gen dictionary with keys, including all tids in the input forcefield
    ff_torsion_param_list = forcefield.get_parameter_handler(
        'ProperTorsions').parameters

    tid_calculated_molecules_list = {}
    molecules_list_dict_from_td = defaultdict = {}
    for torsion_param in ff_torsion_param_list:
        tid_calculated_molecules_list[torsion_param.id] = []
    if os.path.exists('tmp'):
        shutil.rmtree('tmp')
    os.mkdir('tmp')
    os.chdir('tmp')
    for entry_index, td_data in torsiondrive_data.items():
        # pick a single initial molecule
        qcmol = td_data['initial_molecules'][0]

        # write input.mol2 file
        qcjson_mol = qcmol.dict(encoding='json')
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        ofs.open(f'input.mol2')
        oechem.OEWriteMolecule(ofs, oemol)
        ofs.close()
        # test mol2 file
        success, msg, molecule_labels = test_ff_mol2(forcefield, 'input.mol2')
        if not success:
            if verbose == True:
                print(
                    'Error occured while testing input.mol2. Excluded in tid_calculated_molecules_list. '
                )
            continue
        # check if the torsion scan contains one or more conformers forming strong internal H bonds
        if success:
            # write conf.pdb file
            fbmol = FBMolecule(f'input.mol2')
            # list of grid ids sorted
            sorted_grid_ids = sorted(td_data['final_molecules'].keys())
            # write scan.xyz
            target_mol = FBMolecule()
            target_mol.elem = fbmol.elem
            target_mol.xyzs = []
            target_mol.qm_energies = []
            target_mol.qm_grads = []
            for grid_id in sorted_grid_ids:
                grid_qc_mol = td_data['final_molecules'][grid_id]
                # convert geometry unit Bohr -> Angstrom
                geo = grid_qc_mol.geometry * 0.529177
                target_mol.xyzs.append(geo)
                # add energy and gradient
                target_mol.qm_energies.append(
                    td_data['final_energies'][grid_id])
                target_mol.qm_grads.append(td_data['final_gradients'][grid_id])
            target_mol.write('scan.xyz')

            no_hbonds = check_Hbond(scan_fnm='scan.xyz', top_fnm='input.mol2')
            if not no_hbonds:
                if verbose == True:
                    print(
                        'Internal hydrogen bond detacted. Excluded in tid_calculated_molecules_list. '
                    )
                success = False
        if success:
            mol_index = td_data['attributes']["canonical_isomeric_smiles"]
            indices = td_data['keywords']['dihedrals'][0]
            tid = molecule_labels['ProperTorsions'][tuple(indices)].id

            # qcschema_molecules = [qcmol.dict(encoding='json') for qcmol in td_data['initial_molecules']]
            tid_calculated_molecules_list[tid].append({
                'mol_index': mol_index,
                'indices': indices
            })

            qcschema_molecules = []
            for qcmol in td_data['initial_molecules']:
                j_dict = qcmol.dict(encoding='json')
                qcschema_molecule = {
                    'symbols': j_dict['symbols'],
                    'geometry': j_dict['geometry'],
                    'connectivity': j_dict['connectivity'],
                    'molecular_charge': j_dict['molecular_charge'],
                    'molecular_multiplicity': j_dict['molecular_multiplicity']
                }
                qcschema_molecules.append(qcschema_molecule)

            molecules_list_dict_from_td[mol_index] = qcschema_molecules
    print("\n## Available torsion scans from QCArchive ##\n" + '-' * 90)
    print(f"{'idx':<7} {'tid':7s}  {'Number of torsion scans'}")
    for idx, (tid, molecules_list) in enumerate(
            tid_calculated_molecules_list.items()):
        if len(molecules_list) > 0:
            print(f'{idx:<7} {tid:7s}  {len(molecules_list)}')
    print('-' * 90)
    os.chdir('..')
    shutil.rmtree('tmp')
    return tid_calculated_molecules_list, molecules_list_dict_from_td
예제 #5
0
def make_fb_targets():
    result_mol_folders = [
        os.path.join(results_folder, f) for f in os.listdir(results_folder)
        if os.path.isdir(os.path.join(results_folder, f))
    ]
    result_mol_folders.sort()
    print(
        f"\nLoading data from {len(result_mol_folders)} result folders under {results_folder}"
    )
    # output folder
    if os.path.exists(out_folder):
        shutil.rmtree(out_folder)
    os.mkdir(out_folder)
    target_names = []
    for mol_folder in result_mol_folders:
        mol_name = os.path.basename(mol_folder)
        # the name of the molecules should be consistent with the mol_folder
        mol_file = os.path.join(molecules_folder, mol_name + '.mol2')
        molecule = Molecule(mol_file)
        # find all torsion data
        finished_scans = []
        for f in os.listdir(mol_folder):
            name, ext = os.path.splitext(f)
            if ext == '.xyz':
                finished_scans.append(name)
        if len(finished_scans) == 0:
            print(f'No finished scans found in {mol_folder}')
            continue
        # output target name
        target_name = 'td_' + mol_name
        target_names.append(target_name)
        # make target folder
        this_target_folder = os.path.join(out_folder, target_name)
        os.mkdir(this_target_folder)
        # read data from each finished scans
        target_mol = Molecule()
        target_mol.elem = molecule.elem
        target_mol.xyzs = []
        target_mol.qm_energies = []
        target_mol.qm_grads = []
        for f in finished_scans:
            xyz_file = os.path.join(mol_folder, f + '.xyz')
            m = Molecule(xyz_file)
            target_mol.xyzs += m.xyzs
            # read energy from comment line
            energies = [float(comm.split()[-1]) for comm in m.comms]
            target_mol.qm_energies += energies
            # read gradient
            grad_file = os.path.join(mol_folder, f + '.gradxyz')
            grads = read_gradxyz(grad_file)
            target_mol.qm_grads += grads
        # write qdata.txt
        target_mol.write(os.path.join(this_target_folder, 'qdata.txt'))
        # write scan.xyz
        target_mol.write(os.path.join(this_target_folder, 'scan.xyz'))
        # write pdb
        molecule.write(os.path.join(this_target_folder, 'conf.pdb'))
        # copy mol2 file
        shutil.copyfile(mol_file, os.path.join(this_target_folder,
                                               'input.mol2'))
        # write a note
        with open(os.path.join(this_target_folder, 'notes.txt'), 'w') as fnote:
            fnote.write(
                "Notes: This target is made by make_fb_targets.py, using data from\n"
            )
            fnote.write(mol_file + '\n')
            for f in finished_scans:
                xyz_file = os.path.join(mol_folder, f + '.xyz')
                grad_file = os.path.join(mol_folder, f + '.gradxyz')
                fnote.write(xyz_file + '\n')
                fnote.write(grad_file + '\n')
    # write a target.in file for use in ForceBalance input
    with open(os.path.join(out_folder, 'targets.in'), 'w') as fout:
        for tname in target_names:
            fout.write(target_str.format(name=tname) + '\n')
    print(f"Targets generation finished!")
    print(
        f"You can copy contents in {os.path.join(out_folder, 'targets.in')} to your ForceBalance input file."
    )
예제 #6
0
def make_torsiondrive_target(dataset_name, torsiondrive_data, test_ff=None):
    """
    Make a folder of ForceBalance targets from the torsiondrive data
    """
    target_name_prefix = 'td_' + dataset_name.replace(' ', '_')
    # create new targets folder
    if os.path.exists('targets'):
        shutil.rmtree('targets')
    os.mkdir('targets')
    os.chdir('targets')
    # write each entry as an individual target
    target_idx = 0
    n_targets = len(torsiondrive_data)
    idx_fmt_string = get_int_fmt_string(n_targets)
    target_names = []
    print(f"Generating {n_targets} targets")
    for entry_index, td_data in torsiondrive_data.items():
        # pick a single initial molecule
        qcmol = td_data['initial_molecules'][0]
        # get mol_formula
        mol_formula = qcmol.get_molecular_formula()
        # create target folder
        target_idx_str = idx_fmt_string.format(target_idx)
        target_name = f"{target_name_prefix}_{target_idx_str}_{mol_formula}"
        print(f"{target_idx}: {target_name}")
        os.mkdir(target_name)
        os.chdir(target_name)
        # write a note
        with open('note.txt', 'w') as notefile:
            notefile.write(
                f'Target generated from dataset {dataset_name}, entry {entry_index}'
            )
        # write input.mol2 file
        qcjson_mol = qcmol.dict(encoding='json')
        oemol = cmiles.utils.load_molecule(qcjson_mol)
        ofs.open(f'input.mol2')
        oechem.OEWriteMolecule(ofs, oemol)
        ofs.close()
        # test mol2 file
        success = True
        if test_ff != None:
            success, msg, molecule_labels = test_ff_mol2(test_ff, 'input.mol2')
        if not success:
            if not os.path.exists('../error_mol2s'):
                os.mkdir('../error_mol2s')
            shutil.move(f'input.mol2', f'../error_mol2s/{target_name}.mol2')
            with open(f'../error_mol2s/{target_name}_error.txt',
                      'w') as notefile:
                notefile.write(f'{dataset_name}\ntarget_name {target_name}\n')
                notefile.write(
                    f'entry {entry_index}\ntd_keywords {td_data["keywords"]}\n'
                )
                notefile.write(f'error message:\n{msg}')
            # remove this folder
            os.chdir('..')
            shutil.rmtree(target_name)
        else:
            # write conf.pdb file
            fbmol = Molecule(f'input.mol2')
            fbmol.write(f'conf.pdb')
            # list of grid ids sorted
            sorted_grid_ids = sorted(td_data['final_molecules'].keys())
            # write scan.xyz and qdata.txt files
            target_mol = Molecule()
            target_mol.elem = fbmol.elem
            target_mol.xyzs = []
            target_mol.qm_energies = []
            target_mol.qm_grads = []
            for grid_id in sorted_grid_ids:
                grid_qc_mol = td_data['final_molecules'][grid_id]
                # convert geometry unit Bohr -> Angstrom
                geo = grid_qc_mol.geometry * 0.529177
                target_mol.xyzs.append(geo)
                # add energy and gradient
                target_mol.qm_energies.append(
                    td_data['final_energies'][grid_id])
                target_mol.qm_grads.append(td_data['final_gradients'][grid_id])
            target_mol.write('scan.xyz')
            target_mol.write('qdata.txt')
            # check if the torsion scan contains one or more conformers forming strong internal H bonds
            no_hbonds, hbonds = screening_Hbond(mol2_fnm='input.mol2',
                                                scan_fnm='scan.xyz')
            if no_hbonds != True:
                msg = 'One or more internal H bonds exist.'
                if not os.path.exists('../error_mol2s'):
                    os.mkdir('../error_mol2s')
                shutil.move(f'input.mol2',
                            f'../error_mol2s/{target_name}.mol2')
                with open(f'../error_mol2s/{target_name}_error.txt',
                          'w') as notefile:
                    notefile.write(
                        f'{dataset_name}\ntarget_name {target_name}\n')
                    notefile.write(
                        f'entry {entry_index}\ntd_keywords {td_data["keywords"]}\n'
                    )
                    notefile.write(f'error message:\n{msg}')
                # remove this folder
                os.chdir('..')
                shutil.rmtree(target_name)
            else:
                # pick metadata to write into the metadata.json file
                metadata = copy.deepcopy(td_data['keywords'])
                metadata['dataset_name'] = dataset_name
                metadata['entry_label'] = entry_index
                metadata['canonical_smiles'] = td_data['attributes'].get(
                    'canonical_smiles', 'unknown')
                metadata['torsion_grid_ids'] = sorted_grid_ids
                # find SMIRKs for torsion being scaned if test_ff is provided
                if test_ff:
                    metadata['smirks'] = []
                    metadata['smirks_ids'] = []
                    for torsion_indices in td_data['keywords']['dihedrals']:
                        param = molecule_labels['ProperTorsions'][tuple(
                            torsion_indices)]
                        metadata['smirks'].append(param.smirks)
                        metadata['smirks_ids'].append(param.id)
                with open('metadata.json', 'w') as jsonfile:
                    json.dump(metadata, jsonfile, indent=2)
                # finish this target
                target_names.append(target_name)
                os.chdir('..')
        target_idx += 1

    # write targets.{dataset_name}.in file
    target_in_fnm = f"targets.{dataset_name.replace(' ', '_')}.in"
    with open(target_in_fnm, 'w') as outfile:
        for target_name in target_names:
            outfile.write(target_in_str.format(name=target_name))
    print(f"Successfull generated {len(target_names)} targets.")
    print(
        f"You can copy contents in {target_in_fnm} to your ForceBalance input file."
    )
    os.chdir('..')