Ejemplo n.º 1
0
    def __init__(
            self,
            dbpath,
            molecule=None,
            subset=None,
            download=True,
            collect_triples=False,
            load_only=None,
            environment_provider=spk.environment.SimpleEnvironmentProvider(),
    ):
        if not os.path.exists(dbpath) and molecule is None:
            raise AtomsDataError(
                "Provide a valid dbpath or select desired molecule!")

        if molecule is not None and molecule not in MD17.datasets_dict.keys():
            raise AtomsDataError(
                "Molecule {} is not supported!".format(molecule))

        self.molecule = molecule

        available_properties = [MD17.energy, MD17.forces]

        super(MD17, self).__init__(
            dbpath=dbpath,
            subset=subset,
            load_only=load_only,
            collect_triples=collect_triples,
            download=download,
            available_properties=available_properties,
            environment_provider=environment_provider,
        )
Ejemplo n.º 2
0
def get_property_map(properties, property_mapping, dbpath):
    """
    Provide property map from model properties to dataset properties.

    Args:
        properties (list): model properties
        property_mapping (dict): dict with all possible mappings
        dbpath (str): path to the local database

    Returns:
        dict: The mapping dictionary with model properties as keys and
        dataset properties as values.

    """
    if dbpath is None:
        raise AtomsDataError("Please define your database path.")
    if type(property_mapping) == str:
        property_mapping =\
            {key: value for key, value in
             [prop.split(':') for prop in property_mapping.split(',')]}
    property_map = {}
    for prop in properties:
        if prop in property_mapping.keys():
            property_map[prop] = property_mapping[prop]
        else:
            raise AtomsDataError('Invalid property mapping: "{}" is not a '
                                 'valid property of the database located at'
                                 '{}.'.format(prop, dbpath))
    return property_map
Ejemplo n.º 3
0
 def download(self, dataset):
     """
     download data if not already on disk.
     """
     if not os.path.exists(self.dbpath):
         self._load_data(dataset)
     else:
         raise AtomsDataError("Dataset exists already at path " +
                              self.dbpath)
Ejemplo n.º 4
0
    def _download(self):

        if not os.path.exists(self.dbpath):
            self._load_data()
        else:
            raise AtomsDataError("Dataset exists already at path " +
                                 self.dbpath)

        atref, labels = self._create_atoms_ref()

        self.set_metadata({'atomrefs': atref.tolist(), 'atref_labels': labels})
Ejemplo n.º 5
0
def create_subset(dataset, indices):
    r"""
    Create a subset of atomistic datasets.

    Args:
        dataset (torch.utils.data.Dataset): dataset
        indices (sequence): indices of the subset; no np.ndarrays, because the ase database can not handle np.int values

    Returns:
        spk.data.AtomsDataSubset: subset of input dataset

    """
    max_id = 0 if len(indices) == 0 else max(indices)
    if len(dataset) <= max_id:
        raise AtomsDataError(
            "The subset indices do not match the total length of the dataset!"
        )
    return AtomsDataSubset(dataset, indices)
Ejemplo n.º 6
0
def get_property_map(properties, property_mapping, dbpath):
    """
    Provide property map from model properties to dataset properties.

    Args:
        properties (list): model properties
        property_mapping (dict): dict with all possible mappings
        dbpath (str): path to the local database

    Returns:
        dict: The mapping dictionary with model properties as keys and
        dataset properties as values.

    """
    property_map = {}
    for prop in properties:
        if prop in property_mapping.keys():
            property_map[prop] = property_mapping[prop]
        else:
            raise AtomsDataError('"{}" is not a valid property that is '
                                 'contained in the property_mapping for the '
                                 'database located ad {}.'.format(prop, dbpath))
    return property_map
Ejemplo n.º 7
0
    def _download(self):
        """
        Downloads dataset provided it does not exist in self.path

        Returns:
            works (bool): true if download succeeded or file already exists
        """
        if self.apikey is None:
            raise AtomsDataError(
                "Provide a valid API key in order to download the "
                "Materials Project data!"
            )
        try:
            from pymatgen.ext.matproj import MPRester
            from pymatgen.core import Properties
            import pymatgen as pmg
        except:
            raise ImportError(
                "In order to download Materials Project data, you have to install "
                "pymatgen"
            )

        with connect(self.dbpath) as con:
            with MPRester(self.apikey) as m:
                for N in range(1, 9):
                    for nsites in range(0, 300, 30):
                        ns = {"$lt": nsites + 31, "$gt": nsites}
                        query = m.query(
                            criteria={
                                "nelements": N,
                                "is_compatible": True,
                                "nsites": ns,
                            },
                            properties=[
                                "structure",
                                "energy_per_atom",
                                "formation_energy_per_atom",
                                "total_magnetization",
                                "band_gap",
                                "material_id",
                                "warnings",
                            ],
                        )

                        for k, q in enumerate(query):
                            s = q["structure"]
                            if type(s) is Properties:
                                at = Atoms(
                                    numbers=s.atomic_numbers,
                                    positions=s.cart_coords,
                                    cell=s.lattice.matrix,
                                    pbc=True,
                                )
                                con.write(
                                    at,
                                    data={
                                        MaterialsProject.EPerAtom: q["energy_per_atom"],
                                        MaterialsProject.EformationPerAtom: q[
                                            "formation_energy_per_atom"
                                        ],
                                        MaterialsProject.TotalMagnetization: q[
                                            "total_magnetization"
                                        ],
                                        MaterialsProject.BandGap: q["band_gap"],
                                    },
                                )
        self.set_metadata({})