def fit(self, X, y=None, **kwargs): """ The fit method is the primary drawing input for the parallel coords visualization since it has both the X and y data required for the viz and the transform method does not. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with 2 features y : ndarray or Series of length n An array or series of target or class values kwargs : dict Pass generic arguments to the drawing method Returns ------- self : instance Returns the instance of the transformer/visualizer """ _, ncols = X.shape if ncols == 2: X_two_cols = X if self.features_ is None: self.features_ = ["Feature One", "Feature Two"] # Handle the feature names if they're None. elif self.features_ is not None and is_dataframe(X): X_two_cols = X[self.features_].as_matrix() # handle numpy named/ structured array elif self.features_ is not None and is_structured_array(X): X_selected = X[self.features_] X_two_cols = X_selected.view((np.float64, len(X_selected.dtype.names))) # handle features that are numeric columns in ndarray matrix elif self.features_ is not None and has_ndarray_int_columns(self.features_, X): f_one, f_two = self.features_ X_two_cols = X[:, [int(f_one), int(f_two)]] else: raise YellowbrickValueError(""" ScatterVisualizer only accepts two features, please explicitly set these two features in the init kwargs or pass a matrix/ dataframe in with only two columns.""") # Store the classes for the legend if they're None. if self.classes_ is None: # TODO: Is this the most efficient method? self.classes_ = [str(label) for label in np.unique(y)] # Draw the instances self.draw(X_two_cols, y, **kwargs) # Fit always returns self. return self
def fit(self, X, y=None, **kwargs): """ The fit method is the primary drawing input for the parallel coords visualization since it has both the X and y data required for the viz and the transform method does not. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with 2 features y : ndarray or Series of length n An array or series of target or class values kwargs : dict Pass generic arguments to the drawing method Returns ------- self : instance Returns the instance of the transformer/visualizer """ _, ncols = X.shape if ncols == 2: X_two_cols = X if self.features_ is None: self.features_ = ["Feature One", "Feature Two"] # Handle the feature names if they're None. elif self.features_ is not None and is_dataframe(X): X_two_cols = X[self.features_].as_matrix() # handle numpy named/ structured array elif self.features_ is not None and is_structured_array(X): X_selected = X[self.features_] X_two_cols = X_selected.copy().view((np.float64, len(X_selected.dtype.names))) # handle features that are numeric columns in ndarray matrix elif self.features_ is not None and has_ndarray_int_columns(self.features_, X): f_one, f_two = self.features_ X_two_cols = X[:, [int(f_one), int(f_two)]] else: raise YellowbrickValueError(""" ScatterVisualizer only accepts two features, please explicitly set these two features in the init kwargs or pass a matrix/ dataframe in with only two columns.""") # Store the classes for the legend if they're None. if self.classes_ is None: # TODO: Is this the most efficient method? self.classes_ = [str(label) for label in np.unique(y)] # Draw the instances self.draw(X_two_cols, y, **kwargs) # Fit always returns self. return self
def _select_feature_columns(self, X): """ """ if len(X.shape) == 1: X_flat = X.view(np.float64).reshape(len(X), -1) else: X_flat = X _, ncols = X_flat.shape if ncols == 2: X_two_cols = X if self.features_ is None: self.features_ = ["Feature One", "Feature Two"] # Handle the feature names if they're None. elif self.features_ is not None and is_dataframe(X): X_two_cols = X[self.features_].as_matrix() # handle numpy named/ structured array elif self.features_ is not None and is_structured_array(X): X_selected = X[self.features_] X_two_cols = X_selected.view(np.float64).reshape( len(X_selected), -1) # handle features that are numeric columns in ndarray matrix elif self.features_ is not None and has_ndarray_int_columns( self.features_, X): f_one, f_two = self.features_ X_two_cols = X[:, [int(f_one), int(f_two)]] else: raise YellowbrickValueError(""" ScatterVisualizer only accepts two features, please explicitly set these two features in the init kwargs or pass a matrix/ dataframe in with only two columns.""") return X_two_cols
def _select_feature_columns(self, X): """ """ if len(X.shape) == 1: X_flat = X.copy().view(np.float64).reshape(len(X), -1) else: X_flat = X _, ncols = X_flat.shape if ncols == 2: X_two_cols = X if self.features_ is None: self.features_ = ["Feature One", "Feature Two"] # Handle the feature names if they're None. elif self.features_ is not None and is_dataframe(X): X_two_cols = X[self.features_].as_matrix() # handle numpy named/ structured array elif self.features_ is not None and is_structured_array(X): X_selected = X[self.features_] X_two_cols = X_selected.copy().view(np.float64).reshape(len(X_selected), -1) # handle features that are numeric columns in ndarray matrix elif self.features_ is not None and has_ndarray_int_columns(self.features_, X): f_one, f_two = self.features_ X_two_cols = X[:, [int(f_one), int(f_two)]] else: raise YellowbrickValueError(""" ScatterVisualizer only accepts two features, please explicitly set these two features in the init kwargs or pass a matrix/ dataframe in with only two columns.""") return X_two_cols