def __init__( self, dbpath, cutoff, apikey=None, download=True, subset=None, properties=None, collect_triples=False, ): self.cutoff = cutoff self.apikey = apikey self.dbpath = dbpath environment_provider = AseEnvironmentProvider(cutoff) if properties is None: properties = MaterialsProject.available_properties if download and not os.path.exists(self.dbpath): self._download() super(MaterialsProject, self).__init__(self.dbpath, subset, properties, environment_provider, collect_triples)
def __init__( self, path, cutoff, download=True, subset=None, properties=[], collect_triples=False, ): self.path = path self.cutoff = cutoff self.dbpath = self.path.replace(".tar.gz", ".db") if not os.path.exists(self.path) and not os.path.exists(self.dbpath): raise FileNotFoundError( "Download OMDB dataset (e.g. OMDB-GAP1.tar.gz) from https://omdb.diracmaterials.org/dataset/ and set datapath to this file" ) environment_provider = AseEnvironmentProvider(cutoff) if download and not os.path.exists(self.dbpath): # Convert OMDB .tar.gz into a .db file self._convert() super(OrganicMaterialsDatabase, self).__init__(self.dbpath, subset, properties, environment_provider, collect_triples)
def __init__( self, dbpath, cutoff, apikey=None, download=True, subset=None, load_only=None, collect_triples=False, ): available_properties = [ MaterialsProject.EformationPerAtom, MaterialsProject.EPerAtom, MaterialsProject.BandGap, MaterialsProject.TotalMagnetization, ] units = [eV, eV, eV, 1.0] self.cutoff = cutoff self.apikey = apikey environment_provider = AseEnvironmentProvider(cutoff) super(MaterialsProject, self).__init__( dbpath=dbpath, subset=subset, load_only=load_only, environment_provider=environment_provider, collect_triples=collect_triples, available_properties=available_properties, units=units, download=download, )
def determine_unique_configurations(configurations): cutoff = float(np.max(configurations[0].cell.array) / 2 + 1) unique_reps, unique_config, reps, count_configs = [], [], [], [] schnet = SchNet(n_atom_basis=32, n_filters=32, n_interactions=1, cutoff=cutoff, cutoff_network=CosineCutoff) env = AseEnvironmentProvider(cutoff=cutoff) data = [posinp_to_ase_atoms(pos) for pos in configurations] data = SchnetPackData(data=data, environment_provider=env, collect_triples=False) data_loader = AtomsLoader(data, batch_size=1) for batch in data_loader: reps.append(torch.squeeze(schnet(batch))) for i, rep in enumerate(reps): for j, uni in enumerate(unique_reps): if compare_reps(rep, uni): count_configs[j] += 1 break else: unique_reps.append(rep) unique_config.append(configurations[i]) count_configs.append(1) return unique_config, count_configs
def get_dos( model, posinp, device="cpu", supercell=(6, 6, 6), qpoints=[30, 30, 30], npts=1000, width=0.004, ): if isinstance(posinp, str): atoms = posinp_to_ase_atoms(Posinp.from_file(posinp)) elif isinstance(posinp, Posinp): atoms = posinp_to_ase_atoms(posinp) else: raise ValueError("The posinp variable is not recognized.") if isinstance(model, str): model = load_model(model, map_location=device) elif isinstance(model, torch.nn.Module): pass else: raise ValueError("The model variable is not recognized.") # Bugfix to make older models work with PyTorch 1.6 # Hopefully temporary for mod in model.modules(): if not hasattr(mod, "_non_persistent_buffers_set"): mod._non_persistent_buffers_set = set() assert len(supercell) == 3, "Supercell should be a length 3 object." assert len(qpoints) == 3, "Qpoints should be a length 3 object." supercell = tuple(supercell) cutoff = float(model.state_dict() ["representation.interactions.0.cutoff_network.cutoff"]) calculator = SpkCalculator( model, device=device, energy="energy", forces="forces", environment_provider=AseEnvironmentProvider(cutoff), ) ph = Phonons(atoms, calculator, supercell=supercell, delta=0.02) ph.run() ph.read(acoustic=True) dos = ph.get_dos(kpts=qpoints).sample_grid(npts=npts, width=width) ph.clean() return Dos(dos.energy * 8065.6, dos.weights[0])
def __init__( self, dbpath, subset=None, available_properties=None, load_only=None, cutoff=10.0, units=None, environment_provider=None, collect_triples=False, center_positions=False, ): with connect(dbpath) as conn: atom_test = conn.get(1) available_properties = [ prop for prop in list(atom_test.data.keys()) if not prop.startswith("_") ] self.pbc = True if any(row["pbc"].any() for row in conn.select()) else False environment_provider = (AseEnvironmentProvider( cutoff=cutoff) if self.pbc else SimpleEnvironmentProvider()) if collect_triples is None: raise ValueError( "The variable collect_triples has not been defined.") super(CustomDataset, self).__init__( dbpath=dbpath, subset=subset, available_properties=available_properties, load_only=load_only, units=units, environment_provider=environment_provider, collect_triples=collect_triples, center_positions=center_positions, )
def run( self, property, posinp=None, batch_size=128, ): r""" Central method to use when making a calculation with the calculator. Parameters ---------- property : str Property to be predicted by the calculator posinp : Posinp Atomic configuration to pass to the model batch_size : int Batch sizes. Default is 128. Returns ------- predictions : :class:`numpy.ndarray` Corresponding prediction by the model. """ init_property, out_name, derivative, wrt = get_derivative_names( property, self.available_properties) if abs(derivative) >= 1: self.model.output_modules[0].create_graph = True if len(posinp) > 1 and derivative: batch_size = 1 data = [posinp_to_ase_atoms(pos) for pos in posinp] pbc = True if any(pos.pbc.any() for pos in data) else False environment_provider = (AseEnvironmentProvider( cutoff=self.cutoff) if pbc else SimpleEnvironmentProvider()) data = SchnetPackData( data=data, environment_provider=environment_provider, collect_triples=self.model_type == "wacsf", ) data_loader = AtomsLoader(data, batch_size=batch_size) pred = [] if derivative == 0: if self.model.output_modules[0].derivative is not None: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} pred.append(self.model(batch)) else: with torch.no_grad(): for batch in data_loader: batch = { k: v.to(self.device) for k, v in batch.items() } pred.append(self.model(batch)) if abs(derivative) == 1: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} batch[wrt[0]].requires_grad_() results = self.model(batch) deriv1 = torch.unsqueeze( torch_derivative(results[init_property], batch[wrt[0]]), 0) if derivative < 0: deriv1 = -1.0 * deriv1 pred.append({out_name: deriv1}) if abs(derivative) == 2: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} for inp in set(wrt): batch[inp].requires_grad_() results = self.model(batch) deriv2 = torch.unsqueeze( torch_derivative( torch_derivative( results[init_property], batch[wrt[0]], create_graph=True, ), batch[wrt[0]], ), 0, ) if derivative < 0: deriv2 = -1.0 * deriv2 pred.append({out_name: deriv2}) predictions = {} if self.md: for p in ["energy", "forces"]: predictions[p] = np.concatenate( [batch[p].cpu().detach().numpy() for batch in pred]) else: if derivative: predictions[property] = np.concatenate( [batch[out_name].cpu().detach().numpy() for batch in pred]) else: predictions[property] = np.concatenate([ batch[init_property].cpu().detach().numpy() for batch in pred ]) return predictions
def run( self, property, posinp=None, batch_size=1, ): r""" Central method to use when making a calculation with the calculator. Parameters ---------- property : str Property to be predicted by the calculator posinp : Posinp Atomic configuration to pass to the model Returns ------- predictions : :class:`numpy.ndarray` Corresponding prediction by the model. """ # Initial setup assert ( len(posinp) == 1 ), "Use the PatchSPCalculator for one configuration at a time." atoms = posinp_to_ase_atoms(posinp[0]) if property == "hessian" and any(self.subgrid == 2): raise warnings.warn( """ The hessian matrix can have some bad values with a grid of size 2 because the same atom can be copied multiple times in the buffers of the same subcell. Use a larger grid. """ ) init_property, out_name, derivative, wrt = get_derivative_names( property, self.available_properties ) if abs(derivative) >= 1: self.model.output_modules[0].create_graph = True pbc = True if atoms.pbc.any() else False environment_provider = ( AseEnvironmentProvider(cutoff=self.cutoff) if pbc else SimpleEnvironmentProvider() ) # Split the configuration according to the subgrid at_to_patches = AtomsToPatches( cutoff=self.cutoff, n_interaction=self.n_interaction, grid=self.subgrid ) ( subcells, subcells_main_idx, original_cell_idx, complete_subcell_copy_idx, ) = at_to_patches.split_atoms(atoms) # Pass each subcell independantly results = [] for subcell in subcells: data = SchnetPackData( data=[subcell], environment_provider=environment_provider, collect_triples=self.model_type == "wacsf", ) data_loader = AtomsLoader(data, batch_size=1) if derivative == 0: if self.model.output_modules[0].derivative is not None: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} results.append(self.model(batch)) else: with torch.no_grad(): for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} results.append(self.model(batch)) if abs(derivative) == 1: for batch in data_loader: batch = {k: v.to(self.device) for k, v in batch.items()} batch[wrt[0]].requires_grad_() forward_results = self.model(batch) deriv1 = torch_derivative( forward_results[init_property], batch[wrt[0]] ) if derivative < 0: deriv1 = -1.0 * deriv1 results.append({out_name: deriv1}) if abs(derivative) == 2: raise NotImplementedError() predictions = {} if property == "energy": predictions["energy"] = np.sum( [ patch["individual_energy"][subcells_main_idx[i]] .detach() .cpu() .numpy() for i, patch in enumerate(results) ] ) elif property == "forces": forces = np.zeros((len(atoms), 3)) for i in range(len(results)): forces[original_cell_idx[i]] = ( results[i]["forces"] .detach() .squeeze() .cpu() .numpy()[subcells_main_idx[i]] ) predictions["forces"] = forces elif property == "hessian": hessian = np.zeros((3 * len(atoms), 3 * len(atoms))) for i in range(len(results)): ( hessian_original_cell_idx_0, hessian_original_cell_idx_1, ) = prepare_hessian_indices( original_cell_idx[i], complete_subcell_copy_idx[i] ) ( hessian_subcells_main_idx_0, hessian_subcells_main_idx_1, ) = prepare_hessian_indices( subcells_main_idx[i], np.arange(0, len(complete_subcell_copy_idx[i])), ) hessian[hessian_original_cell_idx_0, hessian_original_cell_idx_1] = ( results[i]["hessian"] .detach() .squeeze() .cpu() .numpy()[hessian_subcells_main_idx_0, hessian_subcells_main_idx_1] ) predictions["hessian"] = hessian else: raise NotImplementedError() return predictions
def predict( modelpath, posinp, name=None, device="cpu", disk_out=True, batch_size=128, overwrite=False, return_values=False, ): if overwrite: to_remove = [dat for dat in os.listdir() if dat.endswith(".db")] for f in to_remove: os.remove(f) model = load_model(modelpath, device=device) if "representation.cutoff.cutoff" in model.state_dict().keys(): model_type = "wacsf" cutoff = float(model.state_dict()["representation.cutoff.cutoff"]) elif any(name in model.state_dict().keys() for name in [ "module.representation.embedding.weight", "representation.embedding.weight", ]): model_type = "schnet" try: cutoff = float( model.state_dict() ["module.representation.interactions.0.cutoff_network.cutoff"]) except KeyError: cutoff = float( model.state_dict() ["representation.interactions.0.cutoff_network.cutoff"]) else: raise NotImplementedError("Model type is not recognized.") if isinstance(posinp, str): if posinp.endswith(".xyz"): name = posinp.split("/")[-1].strip(".xyz") pos = mybigdft.Posinp.from_file(posinp) pbc = False if pos.boundary_conditions == "free" else True data = [pos] elif posinp.endswith(".db"): name = posinp.split("/")[-1].strip(".db") data = connect(posinp) pbc = True if any(row["pbc"].any() for row in data.select()) else False else: raise NotImplementedError("File format not supported.") elif isinstance(posinp, list): if name is None or name == "": name = "structures" if all([isinstance(pos, mybigdft.Posinp) for pos in posinp]): data = [sim.mb_posinp_to_ase_atoms(pos) for pos in posinp] else: raise TypeError( "Posinp should be a list of exclusively mybigdft.Posinp instances." ) pbc = True if any(pos.pbc.any() for pos in data) else False else: raise TypeError(""" Positions should be given either as a path to a file or database, or as a list of mybigdft.Posinp instances. """) environment_provider = (AseEnvironmentProvider( cutoff=cutoff) if pbc else SimpleEnvironmentProvider()) data = BigdftAtomsData( data=data, environment_provider=environment_provider, collect_triples=model_type == "wacsf", ) data_loader = AtomsLoader(data, batch_size=batch_size) with torch.no_grad(): pred = [] for batch in data_loader: batch = {k: v.to(device) for k, v in batch.items()} pred.append(model(batch)) predictions = {"idx": np.arange(1, len(data) + 1)} for property in list(pred[0].keys()): predictions[property] = np.concatenate( [p[property].cpu().numpy() for p in pred]) if disk_out: outfile = name + ".out" with open(outfile, "w") as file: wr = csv.writer(file) wr.writerow(list(predictions.keys())) wr.writerows( zip(*[ predictions[property] for property in list(predictions.keys()) ])) if return_values: return predictions
# Set up directory if not os.path.exists(args.simulation_dir): os.makedirs(args.simulation_dir) # Store command line args spk.utils.to_json(jsonpath, argparse_dict) # Load the model ml_model = torch.load(args.model_path) logging.info("Loaded model.") logging.info("The model you built has: {:d} parameters".format( spk.utils.count_params(ml_model))) if args.enable_pbc: environment = AseEnvironmentProvider(cutoff=args.cutoff) else: environment = SimpleEnvironmentProvider() # Initialize the ML ase interface ml_calculator = spk.interfaces.AseInterface( args.molecule_path, ml_model, args.simulation_dir, args.device, args.energy, args.forces, environment_provider=environment, ) logging.info("Initialized ase driver")
m = metric.aggregate() if hasattr(m, "__iter__"): log_dict[metric.name] = [j for j in m] else: log_dict[metric.name] = m wandb.log(log_dict) # %% # from schnetpack.datasets import MD17 from schnetpack.datasets import AtomsData cspbbr3_data = AtomsData( '40-cspbbr3.db', environment_provider=AseEnvironmentProvider(cutoff=cutoff)) # ethanol_data = MD17(os.path.join(forcetut, 'ethanol.db'), molecule='ethanol') # for i in range(len(cspbbr3_data)): example = cspbbr3_data[0] print(f'Properties of molecule with id {0}:') for k, v in example.items(): print('-', k, ':', v.shape) # example = ethanol_data[0] print('Properties of molecule with id 0:') # for k, v in example.items(): # print('-', k, ':', v.shape)