def __init__( self, dbpath, xyzpath, subset=None, properties=None, environment_provider=SimpleEnvironmentProvider(), pair_provider=None, center_positions=True, ): available_properties = [ExtXYZ.E, ExtXYZ.F] units = [1.0, 1.0] if not os.path.exists(dbpath): os.makedirs(os.path.dirname(dbpath), exist_ok=True) extxyz_to_db(dbpath, xyzpath) super(ExtXYZ, self).__init__( dbpath=dbpath, subset=subset, load_only=properties, environment_provider=environment_provider, collect_triples=pair_provider, center_positions=center_positions, available_properties=available_properties, units=units, )
def __init__(self, model, device="cpu", collect_triples=False, environment_provider=SimpleEnvironmentProvider(), energy=None, forces=None, stress=None, energy_units="eV", forces_units="eV/Angstrom", stress_units="eV/Angstrom/Angstrom/Angstrom", **kwargs): Calculator.__init__(self, **kwargs) self.model = model self.model.to(device) self.atoms_converter = AtomsConverter( environment_provider=environment_provider, collect_triples=collect_triples, device=device, ) self.model_energy = energy self.model_forces = forces self.model_stress = stress # Convert to ASE internal units # MDUnits parses the given energy units and converts them to atomic units as the common denominator. # These are then converted to ASE units self.energy_units = MDUnits.parse_mdunit(energy_units) * units.Ha self.forces_units = MDUnits.parse_mdunit( forces_units) * units.Ha / units.Bohr self.stress_units = (MDUnits.parse_mdunit(stress_units) * units.Ha / units.Bohr**3)
def __init__(self, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, device=torch.device('cpu')): self.environment_provider = environment_provider self.collect_triples = collect_triples # Get device self.device = device
def __init__( self, dbpath, subset=None, available_properties=None, load_only=None, units=None, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, centering_function=get_center_of_mass, ): if not dbpath.endswith(".db"): raise AtomsDataError( "Invalid dbpath! Please make sure to add the file extension '.db' to " "your dbpath.") self.dbpath = dbpath self.subset = subset self.load_only = load_only self.available_properties = self.get_available_properties( available_properties) if load_only is None: self.load_only = self.available_properties if units is None: units = [1.0] * len(self.available_properties) self.units = dict(zip(self.available_properties, units)) self.environment_provider = environment_provider self.collect_triples = collect_triples self.centering_function = centering_function
def __init__(self, model, device="cpu", collect_triples=False, environment_provider=SimpleEnvironmentProvider(), energy=None, forces=None, energy_units="eV", forces_units="eV/Angstrom", **kwargs): Calculator.__init__(self, **kwargs) self.model = model self.atoms_converter = AtomsConverter( environment_provider=environment_provider, collect_triples=collect_triples, device=device, ) self.model_energy = energy self.model_forces = forces # Convert to ASE internal units self.energy_units = MDUnits.parse_mdunit(energy_units) * units.Ha self.forces_units = MDUnits.parse_mdunit( forces_units) * units.Ha / units.Bohr
def __init__(self, datapath, dataset, subset=None, download=True, collect_triples=False, parse_all=False, properties=None): self.load_all = parse_all self.datapath = datapath if dataset not in self.existing_datasets: raise ValueError( "Unknown dataset specification {:s}".format(dataset)) self.dataset = dataset self.database = dataset + ".db" self.dbpath = os.path.join(self.datapath, self.database) self.collect_triples = collect_triples environment_provider = SimpleEnvironmentProvider() if download: self.download() if properties is None: properties = ["energy", "forces"] super(MD17, self).__init__(self.dbpath, subset, properties, environment_provider, collect_triples)
def __init__(self, path, fold, subset=None, download=True, collect_triples=False): if fold not in self.existing_folds: raise ValueError("Fold {:s} does not exist".format(fold)) self.path = path self.fold = fold self.datapath = os.path.join(self.path, "iso17") self.database = fold + ".db" self.dbpath = os.path.join(self.datapath, self.database) environment_provider = SimpleEnvironmentProvider() if download: self.download() properties = ["total_energy", "atomic_forces"] super().__init__(self.dbpath, subset, properties, environment_provider, collect_triples)
def __init__(self, positions, atom_types, modelpaths, device=torch.device('cpu'), environment_provider=SimpleEnvironmentProvider(), collect_triples=False, hessian=False, nac_approx=[1, None, None], adaptive=None, thresholds=None, print_uncertainty=None): # Check whether a list of modelpaths has been passed if not isinstance(modelpaths, Iterable): raise SchNarculatorError( 'List of modelpaths required for ensemble calculator.') super(EnsembleSchNarculator, self).__init__(positions, atom_types, modelpath=modelpaths, hessian=hessian, nac_approx=nac_approx, adaptive=adaptive, thresholds=thresholds, print_uncertainty=print_uncertainty) #device=device, #environment_provider=environment_provider, #collect_triples=collect_triples, hessian=hessian, nac_approx=nac_approx) self.n_models = len(self.model_all) self.uncertainty = {}
def __init__(self, model, device="cpu", collect_triples=False, environment_provider=SimpleEnvironmentProvider(), energy=None, forces=None, stress=None, energy_units="eV", forces_units="eV/Angstrom", stress_units="eV/Angstrom/Angstrom/Angstrom", **kwargs): Calculator.__init__(self, **kwargs) self.model = model self.model.to(device) self.atoms_converter = AtomsConverter( environment_provider=environment_provider, collect_triples=collect_triples, device=device, ) self.model_energy = energy self.model_forces = forces self.model_stress = stress # Convert to ASE internal units (energy=eV, length=A) self.energy_units = MDUnits.unit2unit(energy_units, "eV") self.forces_units = MDUnits.unit2unit(forces_units, "eV/Angstrom") self.stress_units = MDUnits.unit2unit(stress_units, "eV/A/A/A")
def prepare_data(self): print("Preparing data...", flush=True) self.dataset = NpysDataset2(self.hparams.coords, self.hparams.forces, self.hparams.embeddings) self.dataset = SchNetDataset( self.dataset, environment_provider=SimpleEnvironmentProvider(), label=["forces"], ) self.idx_train, self.idx_val, self.idx_test = make_splits( len(self.dataset), self.hparams.val_ratio, self.hparams.test_ratio, self.hparams.seed, os.path.join(self.hparams.log_dir, f"splits.npz"), self.hparams.splits, ) self.train_dataset = torch.utils.data.Subset(self.dataset, self.idx_train) self.val_dataset = torch.utils.data.Subset(self.dataset, self.idx_val) self.test_dataset = torch.utils.data.Subset(self.dataset, self.idx_test) print("train {}, val {}, test {}".format(len(self.train_dataset), len(self.val_dataset), len(self.test_dataset))) if self.hparams.weights is not None: self.weights = torch.from_numpy(np.load(self.hparams.weights)) else: self.weights = torch.ones(len(self.dataset))
def __init__(self, path, download=True, subset=None, properties=[], collect_triples=False, remove_uncharacterized=False): self.path = path self.dbpath = os.path.join(self.path, 'qm9.db') self.atomref_path = os.path.join(self.path, 'atomref.npz') self.evilmols_path = os.path.join(self.path, 'evilmols.npy') self.required_properties = properties environment_provider = SimpleEnvironmentProvider() if download: self._download() if remove_uncharacterized: if subset is None: with connect(self.dbpath) as con: subset = np.arange(con.count()) else: subset = np.array(subset) evilmols = np.load(self.evilmols_path) # attention: 1-indexing vs 0-indexing subset = np.setdiff1d(subset, evilmols - 1) super().__init__(self.dbpath, subset, self.required_properties, environment_provider, collect_triples)
def __init__( self, dbpath, subset=None, load_only=None, available_properties=None, units=None, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, centering_function=get_center_of_mass, download=False, ): super(DownloadableAtomsData, self).__init__( dbpath=dbpath, subset=subset, available_properties=available_properties, load_only=load_only, units=units, environment_provider=environment_provider, collect_triples=collect_triples, centering_function=centering_function, ) if download: self.download()
def __init__( self, molecule_path, ml_model, working_dir, device="cpu", energy="energy", forces="forces", energy_units="eV", forces_units="eV/Angstrom", environment_provider=SimpleEnvironmentProvider(), ): # Setup directory self.working_dir = working_dir if not os.path.exists(self.working_dir): os.makedirs(self.working_dir) # Load the molecule self.molecule = None self._load_molecule(molecule_path) # Set up calculator calculator = SpkCalculator( ml_model, device=device, energy=energy, forces=forces, energy_units=energy_units, forces_units=forces_units, environment_provider=environment_provider, ) self.molecule.set_calculator(calculator) # Unless initialized, set dynamics to False self.dynamics = False
def _convert_atoms( atoms, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, center_positions=False, output=None, ): """ Helper function to convert ASE atoms object to SchNetPack input format. Args: atoms (ase.Atoms): Atoms object of molecule environment_provider (callable): Neighbor list provider. device (str): Device for computation (default='cpu') output (dict): Destination for converted atoms, if not None Returns: dict of torch.Tensor: Properties including neighbor lists and masks reformated into SchNetPack input format. """ if output is None: inputs = {} else: inputs = output # Elemental composition cell = np.array(atoms.cell.array, dtype=np.float32) # get cell array inputs[Properties.Z] = torch.LongTensor(atoms.numbers.astype(np.int)) positions = atoms.positions.astype(np.float32) if center_positions: positions -= atoms.get_center_of_mass() inputs[Properties.R] = torch.FloatTensor(positions) inputs[Properties.cell] = torch.FloatTensor(cell) # get atom environment nbh_idx, offsets = environment_provider.get_environment(atoms) # Get neighbors and neighbor mask inputs[Properties.neighbors] = torch.LongTensor(nbh_idx.astype(np.int)) # Get cells inputs[Properties.cell] = torch.FloatTensor(cell) inputs[Properties.cell_offset] = torch.FloatTensor(offsets.astype(np.float32)) # If requested get neighbor lists for triples if collect_triples: nbh_idx_j, nbh_idx_k, offset_idx_j, offset_idx_k = collect_atom_triples(nbh_idx) inputs[Properties.neighbor_pairs_j] = torch.LongTensor(nbh_idx_j.astype(np.int)) inputs[Properties.neighbor_pairs_k] = torch.LongTensor(nbh_idx_k.astype(np.int)) inputs[Properties.neighbor_offsets_j] = torch.LongTensor( offset_idx_j.astype(np.int) ) inputs[Properties.neighbor_offsets_k] = torch.LongTensor( offset_idx_k.astype(np.int) ) return inputs
def _convert_atoms( atoms, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, centering_function=None, output=None, ): """ Helper function to convert ASE atoms object to SchNetPack input format. Args: atoms (ase.Atoms): Atoms object of molecule environment_provider (callable): Neighbor list provider. collect_triples (bool, optional): Set to True if angular features are needed. centering_function (callable or None): Function for calculating center of molecule (center of mass/geometry/...). Center will be subtracted from positions. output (dict): Destination for converted atoms, if not None Returns: dict of torch.Tensor: Properties including neighbor lists and masks reformated into SchNetPack input format. """ if output is None: inputs = {} else: inputs = output # Elemental composition inputs[Properties.Z] = atoms.numbers.astype(np.int) positions = atoms.positions.astype(np.float32) if centering_function: positions -= centering_function(atoms) inputs[Properties.R] = positions # get atom environment nbh_idx, offsets = environment_provider.get_environment(atoms) # Get neighbors and neighbor mask inputs[Properties.neighbors] = nbh_idx.astype(np.int) # Get cells inputs[Properties.cell] = np.array(atoms.cell.array, dtype=np.float32) inputs[Properties.cell_offset] = offsets.astype(np.float32) # If requested get neighbor lists for triples if collect_triples: nbh_idx_j, nbh_idx_k, offset_idx_j, offset_idx_k = collect_atom_triples( nbh_idx) inputs[Properties.neighbor_pairs_j] = nbh_idx_j.astype(np.int) inputs[Properties.neighbor_pairs_k] = nbh_idx_k.astype(np.int) inputs[Properties.neighbor_offsets_j] = offset_idx_j.astype(np.int) inputs[Properties.neighbor_offsets_k] = offset_idx_k.astype(np.int) return inputs
def __init__(self, ml_model, environment_provider=SimpleEnvironmentProvider(), **kwargs): Calculator.__init__(self, **kwargs) self.model = ml_model.model collect_triples = ml_model.type == 'wacsf' device = ml_model.device self.atoms_converter = AtomsConverter(environment_provider=environment_provider, collect_triples=collect_triples, device=device)
def __init__( self, data, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, center_positions=False, ): self.data = data self.environment_provider = environment_provider self.collect_triples = collect_triples self.center_positions = center_positions
def __init__( self, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, device=torch.device("cpu"), res_list=None, ): self.environment_provider = environment_provider self.collect_triples = collect_triples self.res_list = res_list # Get device self.device = device
def __init__(self, dbpath, xyzpath, subset=None, properties=[], environment_provider=SimpleEnvironmentProvider(), pair_provider=None, center_positions=True): if not os.path.exists(dbpath): os.makedirs('/'.join(dbpath.split('/')[:-1])) parse_extxyz(dbpath, xyzpath) super(ExtXYZ, self).__init__(dbpath, subset, properties, environment_provider, pair_provider, center_positions)
def __init__( self, dbpath, subset=None, available_properties=None, load_only=None, units=None, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, centering_function=get_center_of_mass, ): # checks if not dbpath.endswith(".db"): raise AtomsDataError( "Invalid dbpath! Please make sure to add the file extension '.db' to " "your dbpath." ) if subset is not None: raise AtomsDataError( "The subset argument is deprecated and can not be used anymore! " "Please use spk.data.partitioning.create_subset or " "spk.data.AtomsDataSubset to build subsets." ) # database self.dbpath = dbpath # check if database is deprecated: if self._is_deprecated(): self._deprecation_update() self._load_only = load_only self._available_properties = self._get_available_properties( available_properties ) if units is None: units = [1.0] * len(self.available_properties) self.units = dict(zip(self.available_properties, units)) if len(units) != len(self.available_properties): raise AtomsDataError( "The length of available properties and units does not match!" ) # environment self.environment_provider = environment_provider self.collect_triples = collect_triples self.centering_function = centering_function
def __init__(self, model, device="cpu", collect_triples=False, environment_provider=SimpleEnvironmentProvider(), **kwargs): Calculator.__init__(self, **kwargs) self.model = model self.atoms_converter = AtomsConverter( environment_provider=environment_provider, collect_triples=collect_triples, device=device, )
def __init__(self, dbpath, subset=None, required_properties=None, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, center_positions=True, load_charge=False, download=False): super(DownloadableAtomsData, self).__init__(dbpath, subset, required_properties, environment_provider, collect_triples, center_positions, load_charge) if download: self.download()
def __init__(self, dbpath, xyzpath, subset=None, properties=[], environment_provider=SimpleEnvironmentProvider(), pair_provider=None, center_positions=True): if not os.path.exists(dbpath): os.makedirs(os.path.dirname(dbpath), exist_ok=True) extxyz_to_db(dbpath, xyzpath, db_properties=self.available_properties) super(ExtXYZ, self).__init__(dbpath, subset, properties, environment_provider, pair_provider, center_positions)
def __init__(self, dbpath, download=True, subset=None, properties=[], collect_triples=False, remove_uncharacterized=False): self.dbpath = dbpath self.required_properties = properties environment_provider = SimpleEnvironmentProvider() if not os.path.exists(dbpath) and download: self._download(remove_uncharacterized) super().__init__(self.dbpath, subset, self.required_properties, environment_provider, collect_triples)
def __init__(self, dbpath, subset=None, required_properties=None, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, center_positions=True, load_charge=False): self.dbpath = dbpath self.subset = subset self.required_properties = required_properties if required_properties is None: self.required_properties = self.available_properties self.environment_provider = environment_provider self.collect_triples = collect_triples self.centered = center_positions self.load_charge = load_charge
def __init__(self, dbpath, dataset=None, subset=None, download=True, collect_triples=False, properties=None): self.collect_triples = collect_triples environment_provider = SimpleEnvironmentProvider() if properties is None: properties = MD17.properties super(MD17, self).__init__(dbpath, subset, properties, environment_provider, collect_triples) if download: self.download(dataset)
def __init__(self, path, download=True, subset=None, properties=['energy'], collect_triples=False, num_heavy_atoms=8, high_energies=False): self.path = path self.atomref_path = os.path.join(self.path, "atomrefs.npz") self.dbpath = os.path.join(self.path, 'ani1.db') self.num_heavy_atoms = num_heavy_atoms self.high_energies = high_energies environment_provider = SimpleEnvironmentProvider() if download: self._download() super().__init__(self.dbpath, subset, properties, environment_provider, collect_triples)
def __init__( self, dbpath, subset=None, available_properties=None, load_only=None, cutoff=10.0, units=None, environment_provider=None, collect_triples=False, center_positions=False, ): with connect(dbpath) as conn: atom_test = conn.get(1) available_properties = [ prop for prop in list(atom_test.data.keys()) if not prop.startswith("_") ] self.pbc = True if any(row["pbc"].any() for row in conn.select()) else False environment_provider = (AseEnvironmentProvider( cutoff=cutoff) if self.pbc else SimpleEnvironmentProvider()) if collect_triples is None: raise ValueError( "The variable collect_triples has not been defined.") super(CustomDataset, self).__init__( dbpath=dbpath, subset=subset, available_properties=available_properties, load_only=load_only, units=units, environment_provider=environment_provider, collect_triples=collect_triples, center_positions=center_positions, )
def __init__( self, dbpath, subset=None, available_properties=None, load_only=None, units=None, environment_provider=SimpleEnvironmentProvider(), collect_triples=False, center_positions=True, ): self.dbpath = dbpath self.subset = subset self.load_only = load_only self.available_properties = self.get_available_properties( available_properties) if load_only is None: self.load_only = self.available_properties if units is None: units = [1.0] * len(self.available_properties) self.units = dict(zip(self.available_properties, units)) self.environment_provider = environment_provider self.collect_triples = collect_triples self.center_positions = center_positions
def __init__(self, positions, atom_types, modelpath, device=torch.device('cpu'), environment_provider=SimpleEnvironmentProvider(), collect_triples=False, hessian=False, nac_approx=[1, None, None], adaptive=None, thresholds=None, print_uncertainty=None): self.device = device # Load model self.parallel = False self.model = self._load_model(modelpath) self.adaptive = adaptive self.thresholds = thresholds self.print_uncertainty = print_uncertainty if self.adaptive is not None: if not self.parallel: self.n_states_dict = self.model[0].output_modules[0].n_states self.model_all = self.model self.model = self.model[0] else: print("Parallel mode not implemented for adaptive sampling.") else: if not self.parallel: self.n_states_dict = self.model.output_modules[0].n_states else: self.n_states_dict = self.model.module.output_modules[ 0].n_states self.n_states = self.n_states_dict['n_states'] self.n_singlets = self.n_states_dict['n_singlets'] self.n_triplets = self.n_states_dict['n_triplets'] self.n_atoms = positions.shape[0] self.environment_provider = environment_provider self.collect_triples = collect_triples self.threshold_dE_S = nac_approx[1] self.threshold_dE_T = nac_approx[2] self.hessian = [ hessian, self.n_singlets, self.threshold_dE_S, self.n_triplets, self.threshold_dE_T ] self.nacs_approx_method = nac_approx[0] # Enable the hessian flag if requested and set need_hessian in old models if self.parallel: if not hasattr(self.model.module.output_modules, 'need_hessian'): self.model.module.output_modules.need_hessian = self.hessian if hessian: if schnarc.data.Properties.energy in self.model.module.output_modules[ 0].output_dict: self.model.module.output_modules[0].output_dict[ schnarc.data.Properties. energy].return_hessian = self.hessian else: if not hasattr(self.model.output_modules, 'need_hessian'): self.model.output_modules.need_hessian = self.hessian if hessian: if schnarc.data.Properties.energy in self.model.output_modules[ 0].output_dict: self.model.output_modules[0].output_dict[ schnarc.data.Properties. energy].return_hessian = self.hessian self.molecule = Atoms(atom_types, positions)