def indicator_matrix(df): ''' The indicator matrix of the original dataframe. ''' indicator_matrix = pd.get_dummies(df) for column in df.columns: if df[column].dtype in ('int64', 'float64'): indicator_matrix[column] = util.rescale(df[column], new_min=0, new_max=1) return indicator_matrix
def indicator_matrix(df): """The indicator matrix of the original dataframe.""" indicator_matrix = pd.get_dummies(df) for column in df.columns: if df[column].dtype in ('int64', 'float64'): indicator_matrix[column] = util.rescale(df[column], new_min=0, new_max=1) return indicator_matrix
def _build_indicator_matrix(self): ''' Build the indicator matrix by placing a `1` where a row takes a value for a variable and a `0` when it doesn't. ''' indicator_matrix = pd.get_dummies(self.X) # Add the numerical variables after rescaling for col in self.numerical_variables.columns: indicator_matrix[col] = util.rescale(self.numerical_variables[col], new_min=0, new_max=1) return indicator_matrix
def _compute(self): # Build the indicator matrix self.indicator_matrix = pd.get_dummies(self.df) # Add the numerical variables after rescaling for col in self._numerical.columns: self.indicator_matrix[col] = util.rescale(self._numerical[col]) # Compute the correspondance analysis of the indicator matrix ca = prince.CA(self.indicator_matrix, components=self.components) # Extract the needed values self.column_projections = ca.column_projections self.row_projections = ca.row_projections self.U = ca.U self.W = ca.W self.V = ca.V # Compute Benzécri scores instead of classical eigenvalues self.eigenvalues = [ ((self.Q / (self.Q - 1)) ** 2 * (w - 1 / self.Q) ** 2) ** 2 if w < 1 / self.Q else w ** 2 for w in self.W ] self._compute_explained_variance()