def test_structure_to_composition(self): coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) struct = Structure(lattice, ["Si"] * 2, coords) df = DataFrame(data={'structure': [struct]}) stc = StructureToComposition() df = stc.featurize_dataframe(df, 'structure') self.assertEqual(df["composition"].tolist()[0], Composition("Si2")) stc = StructureToComposition(reduce=True, target_col_id='composition_red') df = stc.featurize_dataframe(df, 'structure') self.assertEqual(df["composition_red"].tolist()[0], Composition("Si"))
def _add_composition_from_structure(self, df, overwrite=True): """ Automatically deduce compositions from structures if: 1. structures are available 2. compositions are not available (unless overwrite) 3. composition features are actually desired. (deduced from whether composition featurizers are present in self.featurizers). Args: df (pandas.DataFrame): May or may not contain composition column. overwrite (bool): Whether to overwrite the composition column if it already exists. Returns: df (pandas.DataFrame): Contains composition column if desired """ if ( self.structure_col in df.columns and (self.composition_col not in df.columns or overwrite) and self.composition_col in self.featurizers ): if self.composition_col in df.columns: logger.info( self._log_prefix + "composition column already exists, " "overwriting with composition from structure." ) else: logger.info( self._log_prefix + "Adding compositions from structures." ) df = self._tidy_column(df, self.structure_col) # above tidy column will add oxidation states, these oxidation # states will then be transferred to composition. struct2comp = StructureToComposition( reduce=True, target_col_id=self.composition_col, overwrite_data=overwrite, ) struct2comp.set_n_jobs(self.n_jobs) df = struct2comp.featurize_dataframe(df, self.structure_col) return df