Esempio n. 1
0
def mset_to_smiles(max_atoms, filename):
    mset = MSet()
    try:
        mset.load(filename)
    except json.JSONDecodeError:
        print("json decode error")
        return (None, None)
    if mset.n_atoms > max_atoms:
        return (None, None)
    geom = mset.get_min_geom
    try:
        charges = [
            float(atom.labels['wB97X-D.6-311g**.charges'])
            for atom in geom.atoms
        ]
    except KeyError:
        charges = [
            float(atom.labels['wb97x-d.6-311gss.mulliken_charges'])
            for atom in geom.atoms
        ]
    charge = sum(charges) / len(charges)
    try:
        mol = xyz2mol(geom.at_nums, [atom.xyz for atom in geom.atoms], charge)
    except Chem.AtomValenceException:
        print("atom valence exception")
        return (None, None)
    smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
    m = Chem.MolFromSmiles(smiles)
    smiles = Chem.MolToSmiles(m, isomericSmiles=False)
    mset.identifiers.update({"smiles": smiles})
    mset.save(filename)
    return (filename, smiles)
Esempio n. 2
0
def mset_to_smiles(filename):
    mset = MSet()
    try:
        mset.load(filename)
    except json.JSONDecodeError:
        print("json decode error")
        return None
    geom = get_min_geom(mset.geometries)
    charge = 0
    try:
        smiles = geom_to_smiles(geom, charge)
    except:
        return None
    mset.identifiers.update({"smiles": smiles})
    mset.save(filename)
    return filename
Esempio n. 3
0
def mset_to_smiles(filename):
    mset = MSet()
    mset.load(filename)
    geom = mset.get_min_geom
    try:
        cm5_charges = [
            float(atom.labels['wb97x_dz.cm5_charges']) for atom in geom.atoms
        ]
        hirshfeld_charges = [
            float(atom.labels['wb97x_dz.hirshfeld_charges'])
            for atom in geom.atoms
        ]
        charge = ((sum(cm5_charges) / len(cm5_charges)) +
                  (sum(hirshfeld_charges) / len(hirshfeld_charges))) / 2
    except:
        charge = 0
    try:
        smiles = geom_to_smiles(geom, charge)
    except:
        return None
    mset.identifiers.update({"smiles": smiles})
    mset.save(filename)
    return filename
Esempio n. 4
0
def load_ani1x(path_to_h5file, data_keys=[]):
    # Example for extracting DFT/DZ energies and forces
    for i, data in enumerate(iter_data_buckets(path_to_h5file,
                                               keys=data_keys)):
        atoms = [Atom(at_num) for at_num in data['atomic_numbers'].tolist()]
        mset = MoleculeSet(atoms)
        mset.filename = "/mnt/sdb1/adriscoll/ani1x-data/ani1x-msets/ani1x-mol" + str(
            i) + ".mset"
        mol_keys, atom_keys, geoms = [], [], []
        for key in data.keys():
            if key == 'atomic_numbers' or key == 'coordinates':
                continue
            elif 'energy' in key or 'dipole' in key:
                mol_keys.append(key)
            elif 'force' in key or 'charge' in key:
                atom_keys.append(key)
        mol_labels = {key: data[key][-1].tolist() for key in mol_keys}
        atom_labels = {key: data[key][-1].tolist() for key in atom_keys}
        geoms.append(
            mset.new_geometry(data['coordinates'][-1].tolist(), mol_labels,
                              atom_labels))
        mset.trajectories['ani.data'] = geoms
        mset.save()
    return
Esempio n. 5
0
            opt_natoms[len(opt_mol_data['atoms'])].append(opt_mol)
        else:
            opt_natoms[len(opt_mol_data['atoms'])] = [opt_mol]
    with open(
            "/mnt/sdb1/jeherr/chemspider_data/chno_msets/chno_opt_natoms.txt",
            "w") as f:
        json.dump(opt_natoms, f)

opt_matches = {}
meta_matches = {}
for n_atoms, meta_mols in meta_natoms.items():
    opt_mols = opt_natoms[n_atoms]
    opt_msets = []
    meta_msets = []
    for opt_mol in opt_mols:
        opt_mset = MoleculeSet()
        opt_mset.load(opt_mol)
        opt_mset.filename = opt_mol
        opt_msets.append(opt_mset)
    for meta_mol in meta_mols:
        meta_mset = MoleculeSet()
        meta_mset.load(meta_mol)
        meta_mset.filename = meta_mol
        meta_msets.append(meta_mset)
    for meta_mset in meta_msets:
        matches = []
        for opt_mset in opt_msets:
            if meta_mset.compare_hash(opt_mset):
                matches.append(opt_mset.filename)
        meta_matches[meta_mset.filename] = matches
    for opt_mset in opt_msets:
Esempio n. 6
0
            else:
                opt_natoms[len(opt_mol_data['atoms'])] = [opt_mol]
    with open(
            "/mnt/sdb1/adriscoll/chemspider_data/expanded_msets/opt_smiles_natoms.txt",
            "w") as file:
        json.dump(opt_natoms, file)

opt_matches = {}
meta_matches = {}
for n_atoms, meta_mols in meta_natoms.items():
    if n_atoms in opt_natoms.keys():
        opt_mols = opt_natoms[n_atoms]
        opt_msets = []
        meta_msets = []
        for opt_mol in opt_mols:
            opt_mset = MoleculeSet()
            opt_mset.load(opt_mol)
            opt_mset.filename = opt_mol
            opt_msets.append(opt_mset)
        for meta_mol in meta_mols:
            meta_mset = MoleculeSet()
            meta_mset.load(meta_mol)
            meta_mset.filename = meta_mol
            meta_msets.append(meta_mset)
        for meta_mset in meta_msets:
            matches = []
            for opt_mset in opt_msets:
                if meta_mset.identifiers['smiles'] == opt_mset.identifiers[
                        'smiles']:
                    matches.append(opt_mset.filename)
            meta_matches[meta_mset.filename] = matches
Esempio n. 7
0
def read_opt_data(filename):
    n_atoms = None
    mset = None
    atomic_nums = []
    coords = []
    energies = []
    forces = []
    dipoles = []
    quadrupoles = []
    charges = []
    print("Reading ", filename)
    with open(filename, "r") as f:
        try:
            while True:
                line = next(f)
                if "User input:" in line:
                    n_atoms, at_sym, method, basis = parse_sp_input(f)
                elif "Standard Nuclear Orientation" in line:
                    atom_nums, coord = parse_atoms_coords(f, n_atoms)
                    if atom_nums is None:
                        return None
                    atomic_nums.append(atom_nums)
                    coords.append(coord)
                # energy
                elif "Convergence failure" in line:
                    return None
                elif "Cycle" in line and "Energy" in line:
                    energies.append(parse_energy(f))
                # forces
                elif "Gradient of SCF Energy" in line:
                    force = parse_forces(f, n_atoms)
                    if force is not None:
                        forces.append(force)
                    else:
                        print(filename, " contains unparsed forces")
                # dipoles
                elif "Dipole Moment (Debye)" in line:
                    dipoles.append(parse_dipole(f))
                # quadrupoles
                elif "Quadrupole Moments (Debye-Ang)" in line:
                    quadrupoles.append(parse_quadrupole(f))
                # charges
                elif "Ground-State Mulliken Net Atomic Charges" in line:
                    charges.append(parse_charges(f, n_atoms))
                elif "Optimization Cycle" in line:
                    if len(atomic_nums) == len(energies) == len(forces) == len(
                            coords) == len(dipoles) == len(quadrupoles) == len(
                                charges):
                        if line.split()[-1] == "1":
                            try:
                                atoms = [
                                    Atom(at_num) for at_num in atomic_nums[0]
                                ]
                                mset = MoleculeSet(atoms)
                                opt_trajectory = []
                            except:
                                print("Error making MSet for ", filename)
                                return None
                        if len(energies) > len(opt_trajectory):
                            opt_trajectory.append(
                                build_new_geom(atomic_nums, coords, energies,
                                               forces, dipoles, quadrupoles,
                                               charges, method, basis))
                elif "OPTIMIZATION CONVERGED" in line:
                    mset.trajectories[".".join(
                        (method, basis, "opt"))] = opt_trajectory
                    return mset
        except StopIteration:
            print("Hit EOF on ", filename)
            try:
                mset.trajectories[".".join(
                    (method, basis, "opt"))] = opt_trajectory
                return mset
            except UnboundLocalError:
                print("No MSet built for ", filename)
                return None
Esempio n. 8
0
def read_multi_sp_data(filenames):
    n_atoms = None
    mset = None
    atomic_nums = []
    coords = []
    energies = []
    forces = []
    dipoles = []
    quadrupoles = []
    charges = []

    for filename in filenames:
        with open(filename, "r") as f:
            try:
                while True:
                    line = next(f)
                    if "User input:" in line:
                        n_atoms, at_sym, method, basis = parse_sp_input(f)
                    elif "Standard Nuclear Orientation" in line:
                        atom_nums, coord = parse_atoms_coords(f, n_atoms)
                        atomic_nums.append(atom_nums)
                        coords.append(coord)
                    # energy
                    elif "Cycle" in line and "Energy" in line:
                        energies.append(parse_energy(f))
                    # forces
                    elif "Gradient of SCF Energy" in line:
                        force = parse_forces(f, n_atoms)
                        if force is not None:
                            forces.append(force)
                        else:
                            print(filename, " contains unparsed forces")
                    # dipoles
                    elif "Dipole Moment (Debye)" in line:
                        dipoles.append(parse_dipole(f))
                    # quadrupoles
                    elif "Quadrupole Moments (Debye-Ang)" in line:
                        quadrupoles.append(parse_quadrupole(f))
                    # charges
                    elif "Ground-State Mulliken Net Atomic Charges" in line:
                        charges.append(parse_charges(f, n_atoms))
                    elif "Thank you very much for using Q-Chem." in line:
                        if mset is None:
                            print("Attempting to make initial MSet for ",
                                  filename)
                            try:
                                atoms = [
                                    Atom(at_num) for at_num in atomic_nums[0]
                                ]
                                mset = MoleculeSet(atoms)
                                meta_trajectory = []
                            except:
                                print("Error making MSet for ", filename)
                                return None
                        if len(atomic_nums) == len(energies) == len(
                                forces) == len(coords) == len(dipoles) == len(
                                    quadrupoles) == len(charges):
                            if len(energies) > len(meta_trajectory):
                                meta_trajectory.append(
                                    build_new_geom(atomic_nums, coords,
                                                   energies, forces, dipoles,
                                                   quadrupoles, charges,
                                                   method, basis))
                        else:
                            min_len = min([
                                len(atomic_nums),
                                len(energies),
                                len(forces),
                                len(coords),
                                len(dipoles),
                                len(quadrupoles),
                                len(charges)
                            ])
                            atomic_nums, energies, forces, coords, dipoles, quadrupoles, charges = atomic_nums[
                                                                                                   :min_len], \
                                                                                                   energies[
                                                                                                   :min_len], forces[
                                                                                                              :min_len], coords[
                                                                                                                         :min_len], dipoles[
                                                                                                                                    :min_len], quadrupoles[
                                                                                                                                               :min_len], charges[
                                                                                                                                                          :min_len]
            except (StopIteration, UnicodeDecodeError):
                if mset is not None:
                    if len(atomic_nums) == len(energies) == len(forces) == len(
                            coords) == len(dipoles) == len(quadrupoles) == len(
                                charges):
                        if len(energies) > len(meta_trajectory):
                            meta_trajectory.append(
                                build_new_geom(atomic_nums, coords, energies,
                                               forces, dipoles, quadrupoles,
                                               charges, method, basis))
                    else:
                        min_len = min([
                            len(atomic_nums),
                            len(energies),
                            len(forces),
                            len(coords),
                            len(dipoles),
                            len(quadrupoles),
                            len(charges)
                        ])
                        atomic_nums, energies, forces, coords, dipoles, quadrupoles, charges = atomic_nums[:min_len], \
                                                                                               energies[
                                                                                               :min_len], forces[
                                                                                                          :min_len], coords[
                                                                                                                     :min_len], dipoles[
                                                                                                                                :min_len], quadrupoles[
                                                                                                                                           :min_len], charges[
                                                                                                                                                      :min_len]
                continue
    if mset is None:
        print("No geometries collected for ", filenames[0])
        return None
    mset.trajectories[".".join((method, basis, "meta"))] = meta_trajectory
    return mset
Esempio n. 9
0
def read_aimd_data(filename):
    atomic_nums = []
    coords = []
    energies = []
    forces = []
    dipoles = []
    quadrupoles = []
    charges = []
    print("Reading ", filename)
    with open(filename, "r") as f:
        try:
            while True:
                line = next(f)
                if "User input:" in line:
                    n_atoms, at_sym, method, basis, time_steps, aimd_steps, aimd_temp = parse_aimd_input(
                        f)
                # These next sections **SHOULD** only grab the first instance of these properties (i.e. before the AIMD
                # section starts)
                elif "Standard Nuclear Orientation" in line:
                    atom_nums, coord = parse_atoms_coords(f, n_atoms)
                    atomic_nums.append(atom_nums)
                    coords.append(coord)
                # energy
                elif "Cycle" in line and "Energy" in line:
                    energies.append(parse_energy(f))
                # forces
                elif "Gradient of SCF Energy" in line:
                    forces.append(parse_forces(f, n_atoms))
                # dipoles
                elif "Dipole Moment (Debye)" in line:
                    dipoles.append(parse_dipole(f))
                # quadrupoles
                elif "Quadrupole Moments (Debye-Ang)" in line:
                    quadrupoles.append(parse_quadrupole(f))
                # charges
                elif "Ground-State Mulliken Net Atomic Charges" in line:
                    charges.append(parse_charges(f, n_atoms))
                elif "AB INITIO MOLECULAR DYNAMICS" in line:
                    atoms = [Atom(at_num) for at_num in atomic_nums[0]]
                    mset = MoleculeSet(atoms)
                    aimd_trajectory = [
                        build_new_geom(atomic_nums, coords, energies, forces,
                                       dipoles, quadrupoles, charges, method,
                                       basis)
                    ]
                    while True:
                        line = next(f)
                        if "TIME STEPS COMPLETED" in line:
                            if len(atomic_nums) == len(energies) == len(
                                    forces) == len(coords) == len(
                                        dipoles) == len(quadrupoles) == len(
                                            charges):
                                if len(energies) > len(aimd_trajectory):
                                    aimd_trajectory.append(
                                        build_new_geom(atomic_nums, coords,
                                                       energies, forces,
                                                       dipoles, quadrupoles,
                                                       charges, method, basis))
                            else:
                                print(
                                    "Error reading AIMD trajectory at last step",
                                    filename,
                                    "Returning MSet before this step")
                            mset.trajectories[".".join(
                                (method, basis, "aimd"))] = aimd_trajectory
                            return mset
                        elif "TIME STEP" in line:
                            time_step = int(line.split()[2].lstrip("#"))
                            time_au = float(line.split()[5])
                            time_fs = float(line.split()[8])
                            if len(atomic_nums) == len(energies) == len(
                                    forces) == len(coords) == len(
                                        dipoles) == len(quadrupoles) == len(
                                            charges):
                                aimd_trajectory.append(
                                    build_new_geom(atomic_nums, coords,
                                                   energies, forces, dipoles,
                                                   quadrupoles, charges,
                                                   method, basis))
                            else:
                                print("Error reading AIMD trajectory at step",
                                      time_step, filename,
                                      "Returning MSet before "
                                      "this step")
                                mset.trajectories[".".join(
                                    (method, basis, "aimd"))] = aimd_trajectory
                                return mset
                        elif "Standard Nuclear Orientation" in line:
                            atom_nums, coord = parse_atoms_coords(f, n_atoms)
                            atomic_nums.append(atom_nums)
                            coords.append(coord)
                        # energy
                        elif "Cycle" in line and "Energy" in line:
                            energies.append(parse_energy(f))
                        # forces
                        elif "Gradient of SCF Energy" in line:
                            forces.append(parse_forces(f, n_atoms))
                        # dipoles
                        elif "Dipole Moment (Debye)" in line:
                            dipoles.append(parse_dipole(f))
                        # quadrupoles
                        elif "Quadrupole Moments (Debye-Ang)" in line:
                            quadrupoles.append(parse_quadrupole(f))
                        # charges
                        elif "Ground-State Mulliken Net Atomic Charges" in line:
                            charges.append(parse_charges(f, n_atoms))
        except StopIteration:
            print("Hit EOF on ", filename)
            try:
                mset.trajectories[".".join(
                    (method, basis, "aimd"))] = aimd_trajectory
                return mset
            except UnboundLocalError:
                print("No MSet built for ", filename)
                return None