def __init__( self, dbpath, molecule=None, subset=None, download=True, collect_triples=False, load_only=None, environment_provider=spk.environment.SimpleEnvironmentProvider(), ): if not os.path.exists(dbpath) and molecule is None: raise AtomsDataError( "Provide a valid dbpath or select desired molecule!") if molecule is not None and molecule not in MD17.datasets_dict.keys(): raise AtomsDataError( "Molecule {} is not supported!".format(molecule)) self.molecule = molecule available_properties = [MD17.energy, MD17.forces] super(MD17, self).__init__( dbpath=dbpath, subset=subset, load_only=load_only, collect_triples=collect_triples, download=download, available_properties=available_properties, environment_provider=environment_provider, )
def get_property_map(properties, property_mapping, dbpath): """ Provide property map from model properties to dataset properties. Args: properties (list): model properties property_mapping (dict): dict with all possible mappings dbpath (str): path to the local database Returns: dict: The mapping dictionary with model properties as keys and dataset properties as values. """ if dbpath is None: raise AtomsDataError("Please define your database path.") if type(property_mapping) == str: property_mapping =\ {key: value for key, value in [prop.split(':') for prop in property_mapping.split(',')]} property_map = {} for prop in properties: if prop in property_mapping.keys(): property_map[prop] = property_mapping[prop] else: raise AtomsDataError('Invalid property mapping: "{}" is not a ' 'valid property of the database located at' '{}.'.format(prop, dbpath)) return property_map
def download(self, dataset): """ download data if not already on disk. """ if not os.path.exists(self.dbpath): self._load_data(dataset) else: raise AtomsDataError("Dataset exists already at path " + self.dbpath)
def _download(self): if not os.path.exists(self.dbpath): self._load_data() else: raise AtomsDataError("Dataset exists already at path " + self.dbpath) atref, labels = self._create_atoms_ref() self.set_metadata({'atomrefs': atref.tolist(), 'atref_labels': labels})
def create_subset(dataset, indices): r""" Create a subset of atomistic datasets. Args: dataset (torch.utils.data.Dataset): dataset indices (sequence): indices of the subset; no np.ndarrays, because the ase database can not handle np.int values Returns: spk.data.AtomsDataSubset: subset of input dataset """ max_id = 0 if len(indices) == 0 else max(indices) if len(dataset) <= max_id: raise AtomsDataError( "The subset indices do not match the total length of the dataset!" ) return AtomsDataSubset(dataset, indices)
def get_property_map(properties, property_mapping, dbpath): """ Provide property map from model properties to dataset properties. Args: properties (list): model properties property_mapping (dict): dict with all possible mappings dbpath (str): path to the local database Returns: dict: The mapping dictionary with model properties as keys and dataset properties as values. """ property_map = {} for prop in properties: if prop in property_mapping.keys(): property_map[prop] = property_mapping[prop] else: raise AtomsDataError('"{}" is not a valid property that is ' 'contained in the property_mapping for the ' 'database located ad {}.'.format(prop, dbpath)) return property_map
def _download(self): """ Downloads dataset provided it does not exist in self.path Returns: works (bool): true if download succeeded or file already exists """ if self.apikey is None: raise AtomsDataError( "Provide a valid API key in order to download the " "Materials Project data!" ) try: from pymatgen.ext.matproj import MPRester from pymatgen.core import Properties import pymatgen as pmg except: raise ImportError( "In order to download Materials Project data, you have to install " "pymatgen" ) with connect(self.dbpath) as con: with MPRester(self.apikey) as m: for N in range(1, 9): for nsites in range(0, 300, 30): ns = {"$lt": nsites + 31, "$gt": nsites} query = m.query( criteria={ "nelements": N, "is_compatible": True, "nsites": ns, }, properties=[ "structure", "energy_per_atom", "formation_energy_per_atom", "total_magnetization", "band_gap", "material_id", "warnings", ], ) for k, q in enumerate(query): s = q["structure"] if type(s) is Properties: at = Atoms( numbers=s.atomic_numbers, positions=s.cart_coords, cell=s.lattice.matrix, pbc=True, ) con.write( at, data={ MaterialsProject.EPerAtom: q["energy_per_atom"], MaterialsProject.EformationPerAtom: q[ "formation_energy_per_atom" ], MaterialsProject.TotalMagnetization: q[ "total_magnetization" ], MaterialsProject.BandGap: q["band_gap"], }, ) self.set_metadata({})