コード例 #1
0
ファイル: data.py プロジェクト: CompRhys/aviary
    def __getitem__(self, idx: int):
        """Get an entry out of the Dataset

        Args:
            idx (int): index of entry in Dataset

        Returns:
            tuple: containing
            - tuple[Tensor, Tensor, LongTensor, LongTensor]: Roost model inputs
            - list[Tensor | LongTensor]: regression or classification targets
            - list[str | int]: identifiers like material_id, composition
        """
        df_idx = self.df.iloc[idx]
        composition = df_idx[self.inputs]
        cry_ids = df_idx[self.identifiers].to_list()

        comp_dict = Composition(composition).get_el_amt_dict()
        elements = list(comp_dict.keys())

        weights = list(comp_dict.values())
        weights = np.atleast_2d(weights).T / np.sum(weights)

        try:
            elem_fea = np.vstack(
                [self.elem_features[element] for element in elements])
        except AssertionError:
            raise AssertionError(
                f"cry-id {cry_ids[0]} [{composition}] contains element types not in embedding"
            )
        except ValueError:
            raise ValueError(
                f"cry-id {cry_ids[0]} [{composition}] composition cannot be parsed into elements"
            )

        nele = len(elements)
        self_idx = []
        nbr_idx = []
        for i, _ in enumerate(elements):
            self_idx += [i] * nele
            nbr_idx += list(range(nele))

        # convert all data to tensors
        elem_weights = Tensor(weights)
        elem_fea = Tensor(elem_fea)
        self_idx = LongTensor(self_idx)
        nbr_idx = LongTensor(nbr_idx)

        targets = []
        for target in self.task_dict:
            if self.task_dict[target] == "regression":
                targets.append(Tensor([df_idx[target]]))
            elif self.task_dict[target] == "classification":
                targets.append(LongTensor([df_idx[target]]))

        return (
            (elem_weights, elem_fea, self_idx, nbr_idx),
            targets,
            *cry_ids,
        )
コード例 #2
0
def _get_framework(formula, ignored_species) -> str:
    """
    Return the reduced formula of the entry without any of the ignored species
    Return 'ignored' if the all the atoms are ignored
    """
    dd_ = Composition(formula).as_dict()
    if dd_.keys() == set(ignored_species):
        return "ignored"
    for ignored_sp in ignored_species:
        if ignored_sp in dd_:
            dd_.pop(ignored_sp)
    return Composition.from_dict(dd_).reduced_formula