def __getitem__(self, idx: int): """Get an entry out of the Dataset Args: idx (int): index of entry in Dataset Returns: tuple: containing - tuple[Tensor, Tensor, LongTensor, LongTensor]: Roost model inputs - list[Tensor | LongTensor]: regression or classification targets - list[str | int]: identifiers like material_id, composition """ df_idx = self.df.iloc[idx] composition = df_idx[self.inputs] cry_ids = df_idx[self.identifiers].to_list() comp_dict = Composition(composition).get_el_amt_dict() elements = list(comp_dict.keys()) weights = list(comp_dict.values()) weights = np.atleast_2d(weights).T / np.sum(weights) try: elem_fea = np.vstack( [self.elem_features[element] for element in elements]) except AssertionError: raise AssertionError( f"cry-id {cry_ids[0]} [{composition}] contains element types not in embedding" ) except ValueError: raise ValueError( f"cry-id {cry_ids[0]} [{composition}] composition cannot be parsed into elements" ) nele = len(elements) self_idx = [] nbr_idx = [] for i, _ in enumerate(elements): self_idx += [i] * nele nbr_idx += list(range(nele)) # convert all data to tensors elem_weights = Tensor(weights) elem_fea = Tensor(elem_fea) self_idx = LongTensor(self_idx) nbr_idx = LongTensor(nbr_idx) targets = [] for target in self.task_dict: if self.task_dict[target] == "regression": targets.append(Tensor([df_idx[target]])) elif self.task_dict[target] == "classification": targets.append(LongTensor([df_idx[target]])) return ( (elem_weights, elem_fea, self_idx, nbr_idx), targets, *cry_ids, )
def _get_framework(formula, ignored_species) -> str: """ Return the reduced formula of the entry without any of the ignored species Return 'ignored' if the all the atoms are ignored """ dd_ = Composition(formula).as_dict() if dd_.keys() == set(ignored_species): return "ignored" for ignored_sp in ignored_species: if ignored_sp in dd_: dd_.pop(ignored_sp) return Composition.from_dict(dd_).reduced_formula