Example #1
0
    def __init__(
        self,
        images,
        descriptor_setup,
        forcetraining=True,
        save_fps=True,
        cores=1,
    ):
        self.images = images
        self.forcetraining = forcetraining
        fp_scheme, fp_params, cutoff_params, elements = descriptor_setup
        if fp_scheme == "gaussian":
            self.descriptor = Gaussian(Gs=fp_params,
                                       elements=elements,
                                       **cutoff_params)
        elif fp_scheme == "mcsh":
            self.descriptor = AtomisticMCSH(MCSHs=fp_params, elements=elements)
        else:
            raise NotImplementedError

        self.a2d = AtomsToData(
            descriptor=self.descriptor,
            r_energy=True,
            r_forces=self.forcetraining,
            save_fps=save_fps,
            fprimes=forcetraining,
            cores=cores,
        )

        self.data_list = self.process()
Example #2
0
    def predict(self, images, batch_size=32):
        if len(images) < 1:
            warnings.warn("No images found!", stacklevel=2)
            return images

        a2d = AtomsToData(
            descriptor=self.train_dataset.descriptor,
            r_energy=False,
            r_forces=False,
            save_fps=True,
            fprimes=self.forcetraining,
            cores=1,
        )

        data_list = a2d.convert_all(images, disable_tqdm=True)

        self.net.module.eval()
        collate_fn = DataCollater(train=False,
                                  forcetraining=self.forcetraining)

        predictions = {"energy": [], "forces": []}
        for data in data_list:
            collated = collate_fn([data])
            energy, forces = self.net.module(collated)

            energy = self.target_scaler.denorm(
                energy, pred="energy").detach().tolist()
            forces = self.target_scaler.denorm(forces,
                                               pred="forces").detach().numpy()

            predictions["energy"].extend(energy)
            predictions["forces"].append(forces)

        return predictions
Example #3
0
    def __init__(
        self,
        images,
        descriptor_setup,
        forcetraining=True,
        save_fps=True,
        scaling={"type": "normalize", "range": (0, 1)},
        cores=1,
        process=True,
    ):
        self.images = images
        self.forcetraining = forcetraining
        self.scaling = scaling
        self.descriptor = construct_descriptor(descriptor_setup)

        self.a2d = AtomsToData(
            descriptor=self.descriptor,
            r_energy=True,
            r_forces=self.forcetraining,
            save_fps=save_fps,
            fprimes=forcetraining,
            cores=cores,
        )

        self.data_list = self.process() if process else None
class AtomsDataset(Dataset):
    def __init__(
        self,
        images,
        descriptor_setup,
        forcetraining=True,
        save_fps=True,
        scaling={
            "type": "normalize",
            "range": (0, 1)
        },
        cores=1,
    ):
        self.images = images
        self.forcetraining = forcetraining
        self.scaling = scaling
        fp_scheme, fp_params, cutoff_params, elements = descriptor_setup
        if fp_scheme == "gaussian":
            self.descriptor = Gaussian(Gs=fp_params,
                                       elements=elements,
                                       **cutoff_params)
        elif fp_scheme == "mcsh":
            self.descriptor = AtomisticMCSH(MCSHs=fp_params, elements=elements)
        else:
            raise NotImplementedError

        self.a2d = AtomsToData(
            descriptor=self.descriptor,
            r_energy=True,
            r_forces=self.forcetraining,
            save_fps=save_fps,
            fprimes=forcetraining,
            cores=cores,
        )

        self.data_list = self.process()

    def process(self):
        data_list = self.a2d.convert_all(self.images)

        self.feature_scaler = FeatureScaler(data_list, self.forcetraining,
                                            self.scaling)
        self.target_scaler = TargetScaler(data_list, self.forcetraining)
        self.feature_scaler.norm(data_list)
        self.target_scaler.norm(data_list)

        return data_list

    @property
    def input_dim(self):
        return self.data_list[0].fingerprint.shape[1]

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, index):
        return self.data_list[index]
Example #5
0
    def predict(self, images, disable_tqdm=True):
        if len(images) < 1:
            warnings.warn("No images found!", stacklevel=2)
            return images

        self.descriptor = construct_descriptor(self.config["dataset"]["descriptor"])

        a2d = AtomsToData(
            descriptor=self.descriptor,
            r_energy=False,
            r_forces=False,
            save_fps=self.config["dataset"].get("save_fps", True),
            fprimes=self.forcetraining,
            cores=1,
        )

        data_list = a2d.convert_all(images, disable_tqdm=disable_tqdm)
        self.feature_scaler.norm(data_list, disable_tqdm=disable_tqdm)

        self.net.module.eval()
        collate_fn = DataCollater(train=False, forcetraining=self.forcetraining)

        predictions = {"energy": [], "forces": []}
        for data in data_list:
            collated = collate_fn([data]).to(self.device)
            energy, forces = self.net.module([collated])

            energy = self.target_scaler.denorm(
                energy.detach().cpu(), pred="energy"
            ).tolist()
            forces = self.target_scaler.denorm(
                forces.detach().cpu(), pred="forces"
            ).numpy()

            predictions["energy"].extend(energy)
            predictions["forces"].append(forces)

        return predictions
Example #6
0
class AtomsDataset(Dataset):
    def __init__(
        self,
        images,
        descriptor_setup,
        forcetraining=True,
        save_fps=True,
        scaling={"type": "normalize", "range": (0, 1)},
        cores=1,
        process=True,
    ):
        self.images = images
        self.forcetraining = forcetraining
        self.scaling = scaling
        self.descriptor = construct_descriptor(descriptor_setup)

        self.a2d = AtomsToData(
            descriptor=self.descriptor,
            r_energy=True,
            r_forces=self.forcetraining,
            save_fps=save_fps,
            fprimes=forcetraining,
            cores=cores,
        )

        self.data_list = self.process() if process else None

    def process(self):
        data_list = self.a2d.convert_all(self.images)

        self.feature_scaler = FeatureScaler(data_list, self.forcetraining, self.scaling)
        self.target_scaler = TargetScaler(data_list, self.forcetraining)
        self.feature_scaler.norm(data_list)
        self.target_scaler.norm(data_list)

        return data_list

    @property
    def input_dim(self):
        return self.data_list[0].fingerprint.shape[1]

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, index):
        return self.data_list[index]
Example #7
0
def construct_lmdb(images, normalizers, lmdb_path, sigmas_index, MCSHs_index):
    """
    path: Path to trajectory/ASE-compatible file
    lmdb_path: Path to store LMDB dataset.
    """

    db = lmdb.open(
        lmdb_path,
        map_size=1099511627776 * 2,
        subdir=False,
        meminit=False,
        map_async=True,
    )

    potential_files = {
        "H": "../valence_gaussians/H_pseudodensity_2.g",
        "Li": "../valence_gaussians/Li_pseudodensity_2.g",
        "Be": "../valence_gaussians/Be_pseudodensity_2.g",
        "B": "../valence_gaussians/B_pseudodensity_3.g",
        "C": "../valence_gaussians/C_pseudodensity_4.g",
        "N": "../valence_gaussians/N_pseudodensity_4.g",
        "O": "../valence_gaussians/O_pseudodensity_4.g",
        "F": "../valence_gaussians/F_pseudodensity_4.g",
        "Na": "../valence_gaussians/Na_pseudodensity_4.g",
        "Mg": "../valence_gaussians/Mg_pseudodensity_4.g",
        "Al": "../valence_gaussians/Al_pseudodensity_4.g",
        "Si": "../valence_gaussians/Si_pseudodensity_5.g",
        "P": "../valence_gaussians/P_pseudodensity_5.g",
        "S": "../valence_gaussians/S_pseudodensity_5.g",
        "Cl": "../valence_gaussians/Cl_pseudodensity_5.g",
        "K": "../valence_gaussians/K_pseudodensity_4.g",
        "Ca": "../valence_gaussians/Ca_pseudodensity_4.g",
        "Sc": "../valence_gaussians/Sc_pseudodensity_5.g",
        "Ti": "../valence_gaussians/Ti_pseudodensity_5.g",
        "V": "../valence_gaussians/V_pseudodensity_4.g",
        "Cr": "../valence_gaussians/Cr_pseudodensity_4.g",
        "Mn": "../valence_gaussians/Mn_pseudodensity_4.g",
        "Fe": "../valence_gaussians/Fe_pseudodensity_4.g",
        "Co": "../valence_gaussians/Co_pseudodensity_4.g",
        "Ni": "../valence_gaussians/Ni_pseudodensity_4.g",
        "Cu": "../valence_gaussians/Cu_pseudodensity_4.g",
        "Zn": "../valence_gaussians/Zn_pseudodensity_4.g",
        "Ga": "../valence_gaussians/Ga_pseudodensity_5.g",
        "Ge": "../valence_gaussians/Ge_pseudodensity_5.g",
        "As": "../valence_gaussians/As_pseudodensity_5.g",
        "Se": "../valence_gaussians/Se_pseudodensity_5.g",
        "Br": "../valence_gaussians/Br_pseudodensity_5.g",
        "Rb": "../valence_gaussians/Rb_pseudodensity_5.g",
        "Sr": "../valence_gaussians/Sr_pseudodensity_5.g",
        "Y": "../valence_gaussians/Y_pseudodensity_5.g",
        "Zr": "../valence_gaussians/Zr_pseudodensity_4.g",
        "Nb": "../valence_gaussians/Nb_pseudodensity_4.g",
        "Mo": "../valence_gaussians/Mo_pseudodensity_4.g",
        "Tc": "../valence_gaussians/Tc_pseudodensity_4.g",
        "Ru": "../valence_gaussians/Ru_pseudodensity_4.g",
        "Rh": "../valence_gaussians/Rh_pseudodensity_4.g",
        "Pd": "../valence_gaussians/Pd_pseudodensity_4.g",
        "Ag": "../valence_gaussians/Ag_pseudodensity_4.g",
        "Cd": "../valence_gaussians/Cd_pseudodensity_4.g",
        "In": "../valence_gaussians/In_pseudodensity_4.g",
        "Sn": "../valence_gaussians/Sn_pseudodensity_4.g",
        "Sb": "../valence_gaussians/Sb_pseudodensity_4.g",
        "Te": "../valence_gaussians/Te_pseudodensity_4.g",
        "I": "../valence_gaussians/I_pseudodensity_4.g",
        "Cs": "../valence_gaussians/Cs_pseudodensity_5.g",
        "Ba": "../valence_gaussians/Ba_pseudodensity_5.g",
        "Hf": "../valence_gaussians/Hf_pseudodensity_5.g",
        "Ta": "../valence_gaussians/Ta_pseudodensity_5.g",
        "W": "../valence_gaussians/W_pseudodensity_7.g",
        "Re": "../valence_gaussians/Re_pseudodensity_6.g",
        "Os": "../valence_gaussians/Os_pseudodensity_6.g",
        "Ir": "../valence_gaussians/Ir_pseudodensity_6.g",
        "Pt": "../valence_gaussians/Pt_pseudodensity_6.g",
        "Au": "../valence_gaussians/Au_pseudodensity_6.g",
        "Hg": "../valence_gaussians/Hg_pseudodensity_6.g",
        "Tl": "../valence_gaussians/Tl_pseudodensity_6.g",
        "Pb": "../valence_gaussians/Pb_pseudodensity_6.g",
        "Bi": "../valence_gaussians/Bi_pseudodensity_6.g",
    }

    sigmas_dict = {
        37: [
            0.02, 0.05, 0.08, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32, 0.36, 0.4,
            0.45, 0.5, 0.56, 0.62, 0.69, 0.76, 0.84, 0.92, 1.01, 1.1, 1.2, 1.3,
            1.4, 1.52, 1.66, 1.82, 2.0, 2.2, 2.42, 2.66, 2.92, 3.2, 3.5, 3.9,
            4.4, 5.0
        ],
        19: [
            0.02, 0.08, 0.16, 0.24, 0.32, 0.4, 0.5, 0.62, 0.76, 0.92, 1.1, 1.3,
            1.52, 1.82, 2.2, 2.66, 3.2, 3.9, 5.0
        ],
        13: [
            0.02, 0.12, 0.24, 0.36, 0.5, 0.69, 0.92, 1.2, 1.52, 2.0, 2.66, 3.5,
            5.0
        ],
        10: [0.02, 0.16, 0.32, 0.5, 0.76, 1.1, 1.52, 2.2, 3.2, 5.0],
        8: [0.02, 0.2, 0.4, 0.69, 1.1, 1.66, 2.66, 4.4],
    }

    sigmas = sigmas_dict[sigmas_index]

    MCSHs_dict = {
        2: {
            "0": {
                "groups": [1],
                "sigmas": sigmas
            },
            "1": {
                "groups": [1],
                "sigmas": sigmas
            },
            "2": {
                "groups": [1, 2],
                "sigmas": sigmas
            },
        },
        3: {
            "0": {
                "groups": [1],
                "sigmas": sigmas
            },
            "1": {
                "groups": [1],
                "sigmas": sigmas
            },
            "2": {
                "groups": [1, 2],
                "sigmas": sigmas
            },
            "3": {
                "groups": [1, 2, 3],
                "sigmas": sigmas
            },
        },
        4: {
            "0": {
                "groups": [1],
                "sigmas": sigmas
            },
            "1": {
                "groups": [1],
                "sigmas": sigmas
            },
            "2": {
                "groups": [1, 2],
                "sigmas": sigmas
            },
            "3": {
                "groups": [1, 2, 3],
                "sigmas": sigmas
            },
            "4": {
                "groups": [1, 2, 3, 4],
                "sigmas": sigmas
            },
        },
        5: {
            "0": {
                "groups": [1],
                "sigmas": sigmas
            },
            "1": {
                "groups": [1],
                "sigmas": sigmas
            },
            "2": {
                "groups": [1, 2],
                "sigmas": sigmas
            },
            "3": {
                "groups": [1, 2, 3],
                "sigmas": sigmas
            },
            "4": {
                "groups": [1, 2, 3, 4],
                "sigmas": sigmas
            },
            "5": {
                "groups": [1, 2, 3, 4, 5],
                "sigmas": sigmas
            },
        },
    }

    MCSHs = MCSHs_dict[MCSHs_index]

    MCSHs = {"MCSHs": MCSHs, "atom_gaussians": potential_files, "cutoff": 15.0}

    elements = [
        'Pt', 'Al', 'V', 'Pd', 'Fe', 'Sn', 'Ge', 'Bi', 'Ir', 'Re', 'Cd', 'Cr',
        'Ag', 'Hf', 'Ru', 'Ti', 'Cs', 'Os', 'N', 'As', 'O', 'S', 'Mo', 'Ta',
        'Zn', 'Y', 'Mn', 'Na', 'Rh', 'Hg', 'C', 'Co', 'Nb', 'Sc', 'Sr', 'H',
        'Au', 'Ga', 'Tl', 'K', 'Se', 'B', 'Pb', 'Ca', 'Cl', 'Cu', 'Zr', 'Rb',
        'P', 'W', 'Tc', 'Te', 'Ni', 'Sb', 'Si', 'In'
    ]

    forcetraining = False
    descriptor = GMP(MCSHs=MCSHs, elements=elements)
    descriptor_setup = ("gmp", MCSHs, {}, elements)

    a2d = AtomsToData(
        descriptor=descriptor,
        r_energy=True,
        r_forces=False,
        save_fps=False,
        fprimes=forcetraining,
    )

    data_list = []
    idx = 0
    #images = ase.io.read(path, ":")
    for image in tqdm(
            images,
            desc="converting images",
            total=len(images),
            unit=" images",
    ):
        #for image in images:
        do = a2d.convert(image, idx=idx)
        data_list.append(do)
        idx += 1

    feature_scaler = normalizers["feature"]
    target_scaler = normalizers["target"]

    feature_scaler.norm(data_list)
    target_scaler.norm(data_list)

    idx = 0
    for do in tqdm(data_list, desc="Writing images to LMDB"):
        txn = db.begin(write=True)
        txn.put(f"{idx}".encode("ascii"), pickle.dumps(do, protocol=-1))
        txn.commit()
        idx += 1

    txn = db.begin(write=True)
    txn.put("feature_scaler".encode("ascii"),
            pickle.dumps(feature_scaler, protocol=-1))
    txn.commit()

    txn = db.begin(write=True)
    txn.put("target_scaler".encode("ascii"),
            pickle.dumps(target_scaler, protocol=-1))
    txn.commit()

    txn = db.begin(write=True)
    txn.put("length".encode("ascii"), pickle.dumps(idx, protocol=-1))
    txn.commit()

    txn = db.begin(write=True)
    txn.put("elements".encode("ascii"), pickle.dumps(elements, protocol=-1))
    txn.commit()

    txn = db.begin(write=True)
    txn.put("descriptor_setup".encode("ascii"),
            pickle.dumps(descriptor_setup, protocol=-1))
    txn.commit()

    db.sync()
    db.close()