def draw(self, points, target=None, **kwargs): """ Called from the fit method, this method creates the canvas and draws the plot on it. Parameters ---------- kwargs: generic keyword arguments. """ # Resolve the labels with the classes labels = self.labels if self.labels is not None else self.classes_ if len(labels) != len(self.classes_): raise YellowbrickValueError( ("number of supplied labels ({}) does not " "match the number of classes ({})").format( len(labels), len(self.classes_))) # Create the color mapping for the labels. color_values = resolve_colors(n_colors=len(labels), colormap=self.colormap, colors=self.color) colors = dict(zip(labels, color_values)) # Transform labels into a map of class to label labels = dict(zip(self.classes_, labels)) # Define boundaries with a vertical line if self.annotate_docs: for xcoords in self.boundaries_: self.ax.axvline(x=xcoords, color="lightgray", linestyle="dashed") series = defaultdict(lambda: {"x": [], "y": []}) if target is not None: for point, t in zip(points, target): label = labels[t] series[label]["x"].append(point[0]) series[label]["y"].append(point[1]) else: label = self.classes_[0] for x, y in points: series[label]["x"].append(x) series[label]["y"].append(y) for label, points in series.items(): self.ax.scatter( points["x"], points["y"], marker="|", c=colors[label], zorder=100, label=label, ) self.ax.set_yticks(list(range(len(self.indexed_words_)))) self.ax.set_yticklabels(self.indexed_words_) return self.ax
def draw(self): """ Renders the class balance chart on the specified axes from support. """ # Number of colors is either number of classes or 2 colors = resolve_colors( len(self.support_), colormap=self.colormap, colors=self.colors ) if self._mode == BALANCE: self.ax.bar( np.arange(len(self.support_)), self.support_, color=colors, align="center", width=0.5, ) # Compare mode else: bar_width = 0.35 labels = ["train", "test"] for idx, support in enumerate(self.support_): index = np.arange(len(self.classes_)) if idx > 0: index = index + bar_width self.ax.bar( index, support, bar_width, color=colors[idx], label=labels[idx] ) return self.ax
def generate_tsne(title, X, labels): fig, (ax1) = plt.subplots(1, 1, figsize=(4, 2)) title_dic = {'fontsize': 7, 'fontweight': 'bold'} colors = resolve_colors(11, 'Spectral_r') colors2 = resolve_colors(10, 'BrBG_r') tsne = TSNEVisualizer(ax1, colors=colors + colors2,decompose=None) tsne.fit(X, labels) tsne.finalize() ax1 = tsne.ax ax1.set_title(title, title_dic) path = os.path.join(OUTPUT) filename = title filename = os.path.join(path, filename) plt.savefig(filename)
def draw(self): """ Draws the cv scores as a line chart on the current axes. """ # Set the colors from the supplied values or reasonable defaults color_values = resolve_colors(n_colors=4, colors=self.color) # Get the metric used to annotate the graph with its maximizing value argmax = self._check_argmax(self.argmax, self.exclude) for idx, metric in enumerate(METRICS): # Skip any excluded labels if metric not in self.cv_scores_: continue # Get the color ensuring every metric has a static color color = color_values[idx] # Make the label pretty if metric == "fscore": if self.fbeta == 1.0: label = "$f_1$" else: label = "$f_{{\beta={:0.1f}}}".format(self.fbeta) else: label = metric.replace("_", " ") # Draw the metric values self.ax.plot(self.thresholds_, self.cv_scores_[metric], color=color, label=label) # Draw the upper and lower bounds lower = self.cv_scores_["{}_lower".format(metric)] upper = self.cv_scores_["{}_upper".format(metric)] self.ax.fill_between(self.thresholds_, upper, lower, alpha=0.35, linewidth=0, color=color) # Annotate the graph with the maximizing value if argmax and argmax == metric: argmax = self.cv_scores_[metric].argmax() threshold = self.thresholds_[argmax] self.ax.axvline( threshold, ls="--", c="k", lw=1, label="$t_{}={:0.2f}$".format(metric[0], threshold), ) return self.ax
def draw(self, **kwargs): """ Draws the feature importances as a bar chart; called from fit. """ # Quick validation for param in ("feature_importances_", "features_"): if not hasattr(self, param): raise NotFitted("missing required param '{}'".format(param)) # Find the positions for each bar pos = np.arange(self.features_.shape[0]) + 0.5 # Plot the bar chart if self.stack: colors = resolve_colors(len(self.classes_), colormap=self.colormap) legend_kws = {"bbox_to_anchor": (1.04, 0.5), "loc": "center left"} bar_stack( self.feature_importances_, ax=self.ax, labels=list(self.classes_), ticks=self.features_, orientation="h", colors=colors, legend_kws=legend_kws, ) else: colors = resolve_colors(len(self.features_), colormap=self.colormap, colors=self.colors) self.ax.barh(pos, self.feature_importances_, color=colors, align="center") # Set the labels for the bars self.ax.set_yticks(pos) self.ax.set_yticklabels(self.features_) return self.ax
def draw(self, points, target=None, **kwargs): """ Called from the fit method, this method draws the TSNE scatter plot, from a set of decomposed points in 2 dimensions. This method also accepts a third dimension, target, which is used to specify the colors of each of the points. If the target is not specified, then the points are plotted as a single cloud to show similar documents. """ # Resolve the labels with the classes labels = self.labels if self.labels is not None else self.classes_ if len(labels) != len(self.classes_): raise YellowbrickValueError( ( "number of supplied labels ({}) does not " "match the number of classes ({})" ).format(len(labels), len(self.classes_)) ) # Create the color mapping for the labels. self.color_values_ = resolve_colors( n_colors=len(labels), colormap=self.colormap, colors=self.colors ) colors = dict(zip(labels, self.color_values_)) # Transform labels into a map of class to label labels = dict(zip(self.classes_, labels)) # Expand the points into vectors of x and y for scatter plotting, # assigning them to their label if the label has been passed in. # Additionally, filter classes not specified directly by the user. series = defaultdict(lambda: {"x": [], "y": []}) if target is not None: for t, point in zip(target, points): label = labels[t] series[label]["x"].append(point[0]) series[label]["y"].append(point[1]) else: label = self.classes_[0] for x, y in points: series[label]["x"].append(x) series[label]["y"].append(y) # Plot the points for label, points in series.items(): self.ax.scatter( points["x"], points["y"], c=colors[label], alpha=self.alpha, label=label ) return self.ax
def draw(self): """ Draws the cv scores as a line chart on the current axes. """ # Set the colors from the supplied values or reasonable defaults color_values = resolve_colors(n_colors=4, colors=self.color) for idx, metric in enumerate(METRICS): # Skip any excluded labels if metric not in self.cv_scores_: continue # Get the color ensuring every metric has a static color color = color_values[idx] # Make the label pretty if metric == "fscore": if self.fbeta == 1.0: label = "$f_1$" else: label = "$f_{{\beta={:0.1f}}}".format(self.fbeta) else: label = metric.replace("_", " ") # Draw the metric values self.ax.plot( self.thresholds_, self.cv_scores_[metric], color=color, label=label ) # Draw the upper and lower bounds lower = self.cv_scores_["{}_lower".format(metric)] upper = self.cv_scores_["{}_upper".format(metric)] self.ax.fill_between( self.thresholds_, upper, lower, alpha=0.35, linewidth=0, color=color ) # Annotate the graph with the maximizing value if self.argmax.lower() == metric: argmax = self.cv_scores_[metric].argmax() threshold = self.thresholds_[argmax] self.ax.axvline( threshold, ls='--', c='k', lw=1, label="$t_{}={:0.2f}$".format(metric[0], threshold) ) return self.ax
def draw(self, points, target=None, **kwargs): """ Called from the fit method, this method creates the canvas and draws the plot on it. Parameters ---------- kwargs: generic keyword arguments. """ # Resolve the labels with the classes labels = self.labels if self.labels is not None else self.classes_ if len(labels) != len(self.classes_): raise YellowbrickValueError(( "number of supplied labels ({}) does not " "match the number of classes ({})" ).format(len(labels), len(self.classes_))) # Create the color mapping for the labels. color_values = resolve_colors( n_colors=len(labels), colormap=self.colormap, colors=self.color) colors = dict(zip(labels, color_values)) # Transform labels into a map of class to label labels = dict(zip(self.classes_, labels)) # Define boundaries with a vertical line if self.annotate_docs: for xcoords in self.boundaries_: self.ax.axvline(x=xcoords, color='lightgray', linestyle='dashed') series = defaultdict(lambda: {'x':[], 'y':[]}) if target is not None: for point, t in zip(points, target): label = labels[t] series[label]['x'].append(point[0]) series[label]['y'].append(point[1]) else: label = self.classes_[0] for x, y in points: series[label]['x'].append(x) series[label]['y'].append(y) for label, points in series.items(): self.ax.scatter(points['x'], points['y'], marker='|', c=colors[label], zorder=100, label=label) self.ax.set_yticks(list(range(len(self.indexed_words_)))) self.ax.set_yticklabels(self.indexed_words_)
def draw(self, *kwargs): """ Renders the visualization Parameters ---------- kwargs: dict keyword arguments passed to Scikit-Learn API. Returns ------- self.ax : AxesSubplot of the visualizer Returns the AxesSubplot instance of the visualizer """ # Set the colors from the supplied values or reasonable defaults color_values = resolve_colors(n_colors=3, colors=self.color) uniform_thresholds = np.linspace(0, 1, num=101) uniform_precision_plots = [] uniform_recall_plots = [] uniform_queue_rate_plots = [] for data in self.plot_data: uniform_precision = [] uniform_recall = [] uniform_queue_rate = [] for ut in uniform_thresholds: index = bisect.bisect_left(data['thresholds'], ut) uniform_precision.append(data['precision'][index]) uniform_recall.append(data['recall'][index]) uniform_queue_rate.append(data['queue_rate'][index]) uniform_precision_plots.append(uniform_precision) uniform_recall_plots.append(uniform_recall) uniform_queue_rate_plots.append(uniform_queue_rate) uplots = (uniform_precision_plots, uniform_recall_plots, uniform_queue_rate_plots) for uniform_plot, color in zip(uplots, color_values): # Compute the lower, median, and upper plots lower, median, upper = mstats.mquantiles(uniform_plot, prob=self.quantiles, axis=0) # Draw the median line self.ax.plot(uniform_thresholds, median, color=color) # Draw the fill between the lower and upper bounds self.ax.fill_between(uniform_thresholds, upper, lower, alpha=0.5, linewidth=0, color=color) return self.ax
def draw(self, X, y, **kwargs): """Called from the fit method, this method creates a scatter plot that draws each instance as a class or target colored point, whose location is determined by the feature data set. """ # Set the axes limits self.ax.set_xlim([-1,1]) self.ax.set_ylim([-1,1]) # set the colors if self.colormap is not None or self.color is not None: color_values = resolve_colors( num_colors=len(self.classes_), colormap=self.colormap, color=self.color) else: color_values = get_color_cycle() colors = dict(zip(self.classes_, color_values)) # Create a data structure to hold the scatter plot representations to_plot = {} for kls in self.classes_: to_plot[kls] = [[], []] # Add each row of the data set to to_plot for plotting # TODO: make this an independent function for override for i, row in enumerate(X): row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) x_, y_ = row_[0], row_[1] kls = self.classes_[y[i]] to_plot[kls][0].append(x_) to_plot[kls][1].append(y_) # Add the scatter plots from the to_plot function # TODO: store these plots to add more instances to later # TODO: make this a separate function for i, kls in enumerate(self.classes_): self.ax.scatter( to_plot[kls][0], to_plot[kls][1], marker=next(self.markers), color=colors[kls], label=str(kls), **kwargs) self.ax.axis('equal')
def draw(self, points, target=None, **kwargs): """ Called from the fit method, this method draws the TSNE scatter plot, from a set of decomposed points in 2 dimensions. This method also accepts a third dimension, target, which is used to specify the colors of each of the points. If the target is not specified, then the points are plotted as a single cloud to show similar documents. """ # Resolve the labels with the classes labels = self.labels if self.labels is not None else self.classes_ if len(labels) != len(self.classes_): raise YellowbrickValueError(( "number of supplied labels ({}) does not " "match the number of classes ({})" ).format(len(labels), len(self.classes_))) # Create the color mapping for the labels. self.color_values_ = resolve_colors( n_colors=len(labels), colormap=self.colormap, colors=self.color) colors = dict(zip(labels, self.color_values_)) # Transform labels into a map of class to label labels = dict(zip(self.classes_, labels)) # Expand the points into vectors of x and y for scatter plotting, # assigning them to their label if the label has been passed in. # Additionally, filter classes not specified directly by the user. series = defaultdict(lambda: {'x':[], 'y':[]}) if target is not None: for t, point in zip(target, points): label = labels[t] series[label]['x'].append(point[0]) series[label]['y'].append(point[1]) else: label = self.classes_[0] for x,y in points: series[label]['x'].append(x) series[label]['y'].append(y) # Plot the points for label, points in series.items(): self.ax.scatter( points['x'], points['y'], c=colors[label], alpha=self.alpha, label=label )
def draw(self, X, y, **kwargs): """Called from the fit method, this method creates a scatter plot that draws each instance as a class or target colored point, whose location is determined by the feature data set. """ # Set the axes limits self.ax.set_xlim([-1,1]) self.ax.set_ylim([-1,1]) # set the colors color_values = resolve_colors( n_colors=len(self.classes_), colormap=self.colormap, colors=self.color ) colors = dict(zip(self.classes_, color_values)) # Create a data structure to hold the scatter plot representations to_plot = {} for kls in self.classes_: to_plot[kls] = [[], []] # Add each row of the data set to to_plot for plotting # TODO: make this an independent function for override for i, row in enumerate(X): row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) x_, y_ = row_[0], row_[1] kls = self.classes_[y[i]] to_plot[kls][0].append(x_) to_plot[kls][1].append(y_) # Add the scatter plots from the to_plot function # TODO: store these plots to add more instances to later # TODO: make this a separate function for i, kls in enumerate(self.classes_): self.ax.scatter( to_plot[kls][0], to_plot[kls][1], marker=next(self.markers), color=colors[kls], label=str(kls), alpha=self.alpha, **kwargs) self.ax.axis('equal')
def draw(self, **kwargs): """ Called from the fit method, this method creates the canvas and draws the part-of-speech tag mapping as a bar chart. Parameters ---------- kwargs: dict generic keyword arguments. Returns ------- ax : matplotlib axes Axes on which the PosTagVisualizer was drawn. """ # Converts nested dict to nested list pos_tag_counts = np.array( [list(i.values()) for i in self.pos_tag_counts_.values()]) # stores sum of nested list column wise pos_tag_sum = np.sum(pos_tag_counts, axis=0) if self.frequency: # sorts the count and tags by sum for frequency true idx = (pos_tag_sum).argsort()[::-1] self._pos_tags = np.array(self._pos_tags)[idx] pos_tag_counts = pos_tag_counts[:, idx] if self.stack: bar_stack( pos_tag_counts, ax=self.ax, labels=list(self.labels_), ticks=self._pos_tags, colors=self.colors, colormap=self.colormap, ) else: xidx = np.arange(len(self._pos_tags)) colors = resolve_colors(n_colors=len(self._pos_tags), colormap=self.colormap, colors=self.colors) self.ax.bar(xidx, pos_tag_counts[0], color=colors) return self.ax
def draw(self): """ Draws the precision-recall curves computed in score on the axes. """ # set the colors self._colors = resolve_colors(n_colors=len(self.classes_), colormap=self.cmap, colors=self.colors) if self.iso_f1_curves: for f1 in self.iso_f1_values: x = np.linspace(0.01, 1) y = f1 * x / (2 * x - f1) self.ax.plot(x[y >= 0], y[y >= 0], color="#333333", alpha=0.2) self.ax.annotate("$f_1={:0.1f}$".format(f1), xy=(0.9, y[45] + 0.02)) if self.target_type_ == BINARY: return self._draw_binary() return self._draw_multiclass()
def draw(self, X, y=None, **kwargs): """ Called from the fit method, this method creates a decision boundary plot, and if self.scatter is True, it will scatter plot that draws each instance as a class or target colored point, whose location is determined by the feature data set. """ # ensure that if someone is passing in another X such as X_test, that # features will be properly handled X = self._select_feature_columns(X) color_cycle = iter( resolve_colors(colors=self.colors, n_colors=len(self.classes_))) colors = OrderedDict([(c, next(color_cycle)) for c in self.classes_.keys()]) self.ax.pcolormesh( self.xx, self.yy, self.Z_shape, alpha=self.pcolormesh_alpha, cmap=ListedColormap(colors.values())) # Create a data structure to hold the scatter plot representations to_plot = OrderedDict() for index in self.classes_.values(): to_plot[index] = [[], []] # Add each row of the data set to to_plot for plotting for i, row in enumerate(X): row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) x_, y_ = row_[0], row_[1] # look up the y class name if given in init if self.class_labels is not None: target = self.class_labels[y[i]] else: target = y[i] index = self.classes_[target] to_plot[index][0].append(x_) to_plot[index][1].append(y_) # Add the scatter plots from the to_plot function # TODO: store these plots to add more instances to later # TODO: make this a separate function if self.show_scatter: for kls, index in self.classes_.items(): self.ax.scatter( to_plot[index][0], to_plot[index][1], marker=next(self.markers), color=colors[kls], alpha=self.scatter_alpha, s=30, edgecolors='black', label=str(kls), **kwargs) else: labels = [ Patch(color=colors[kls], label=kls) for kls in self.classes_.keys() ] self.ax.legend(handles=labels)
def draw(self, X, y, **kwargs): """ Called from the fit method, this method creates the radviz canvas and draws each instance as a class or target colored point, whose location is determined by the feature data set. """ # Convert from dataframe if is_dataframe(X): X = X.values # Clean out nans and warn that the user they aren't plotted nan_warnings.warn_if_nans_exist(X) X, y = nan_warnings.filter_missing(X, y) # Get the shape of the data nrows, ncols = X.shape # Set the axes limits self.ax.set_xlim([-1,1]) self.ax.set_ylim([-1,1]) # Create the colors # TODO: Allow both colormap, listed colors, and palette definition # TODO: Make this an independent function or property for override! color_values = resolve_colors( n_colors=len(self.classes_), colormap=self.colormap, colors=self.color ) self._colors = dict(zip(self.classes_, color_values)) # Create a data structure to hold scatter plot representations to_plot = {} for kls in self.classes_: to_plot[kls] = [[], []] # Compute the arcs around the circumference for each feature axis # TODO: make this an independent function for override s = np.array([ (np.cos(t), np.sin(t)) for t in [ 2.0 * np.pi * (i / float(ncols)) for i in range(ncols) ] ]) # Compute the locations of the scatter plot for each class # Normalize the data first to plot along the 0, 1 axis for i, row in enumerate(self.normalize(X)): row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) xy = (s * row_).sum(axis=0) / row.sum() kls = self.classes_[y[i]] to_plot[kls][0].append(xy[0]) to_plot[kls][1].append(xy[1]) # Add the scatter plots from the to_plot function # TODO: store these plots to add more instances to later # TODO: make this a separate function for i, kls in enumerate(self.classes_): self.ax.scatter( to_plot[kls][0], to_plot[kls][1], color=self._colors[kls], label=str(kls), alpha=self.alpha, **kwargs ) # Add the circular axis path # TODO: Make this a seperate function (along with labeling) self.ax.add_patch(patches.Circle( (0.0, 0.0), radius=1.0, facecolor='none', edgecolor='grey', linewidth=.5 )) # Add the feature names for xy, name in zip(s, self.features_): # Add the patch indicating the location of the axis self.ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='#777777')) # Add the feature names offset around the axis marker if xy[0] < 0.0 and xy[1] < 0.0: self.ax.text(xy[0] - 0.025, xy[1] - 0.025, name, ha='right', va='top', size='small') elif xy[0] < 0.0 and xy[1] >= 0.0: self.ax.text(xy[0] - 0.025, xy[1] + 0.025, name, ha='right', va='bottom', size='small') elif xy[0] >= 0.0 and xy[1] < 0.0: self.ax.text(xy[0] + 0.025, xy[1] - 0.025, name, ha='left', va='top', size='small') elif xy[0] >= 0.0 and xy[1] >= 0.0: self.ax.text(xy[0] + 0.025, xy[1] + 0.025, name, ha='left', va='bottom', size='small') self.ax.axis('equal')
def draw(self, X, y, **kwargs): """ Called from the fit method, this method creates the radviz canvas and draws each instance as a class or target colored point, whose location is determined by the feature data set. """ # Convert from dataframe if is_dataframe(X): X = X.values # Clean out nans and warn that the user they aren't plotted nan_warnings.warn_if_nans_exist(X) X, y = nan_warnings.filter_missing(X, y) # Get the shape of the data nrows, ncols = X.shape # Set the axes limits self.ax.set_xlim([-1, 1]) self.ax.set_ylim([-1, 1]) # Create the colors # TODO: Allow both colormap, listed colors, and palette definition # TODO: Make this an independent function or property for override! color_values = resolve_colors(n_colors=len(self.classes_), colormap=self.colormap, colors=self.color) self._colors = dict(zip(self.classes_, color_values)) # Create a data structure to hold scatter plot representations to_plot = {} for kls in self.classes_: to_plot[kls] = [[], []] # Compute the arcs around the circumference for each feature axis # TODO: make this an independent function for override s = np.array([ (np.cos(t), np.sin(t)) for t in [2.0 * np.pi * (i / float(ncols)) for i in range(ncols)] ]) # Compute the locations of the scatter plot for each class # Normalize the data first to plot along the 0, 1 axis for i, row in enumerate(self.normalize(X)): row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) xy = (s * row_).sum(axis=0) / row.sum() kls = self.classes_[y[i]] to_plot[kls][0].append(xy[0]) to_plot[kls][1].append(xy[1]) # Add the scatter plots from the to_plot function # TODO: store these plots to add more instances to later # TODO: make this a separate function for i, kls in enumerate(self.classes_): self.ax.scatter(to_plot[kls][0], to_plot[kls][1], color=self._colors[kls], label=str(kls), alpha=self.alpha, **kwargs) # Add the circular axis path # TODO: Make this a seperate function (along with labeling) self.ax.add_patch( patches.Circle((0.0, 0.0), radius=1.0, facecolor='none', edgecolor='grey', linewidth=.5)) # Add the feature names for xy, name in zip(s, self.features_): # Add the patch indicating the location of the axis self.ax.add_patch( patches.Circle(xy, radius=0.025, facecolor='#777777')) # Add the feature names offset around the axis marker if xy[0] < 0.0 and xy[1] < 0.0: self.ax.text(xy[0] - 0.025, xy[1] - 0.025, name, ha='right', va='top', size='small') elif xy[0] < 0.0 and xy[1] >= 0.0: self.ax.text(xy[0] - 0.025, xy[1] + 0.025, name, ha='right', va='bottom', size='small') elif xy[0] >= 0.0 and xy[1] < 0.0: self.ax.text(xy[0] + 0.025, xy[1] - 0.025, name, ha='left', va='top', size='small') elif xy[0] >= 0.0 and xy[1] >= 0.0: self.ax.text(xy[0] + 0.025, xy[1] + 0.025, name, ha='left', va='bottom', size='small') self.ax.axis('equal')
def draw(self, X, y=None, **kwargs): """ Called from the fit method, this method creates a decision boundary plot, and if self.scatter is True, it will scatter plot that draws each instance as a class or target colored point, whose location is determined by the feature data set. """ # ensure that if someone is passing in another X such as X_test, that # features will be properly handled X = self._select_feature_columns(X) color_cycle = iter( resolve_colors(color=self.colors, num_colors=len(self.classes_))) colors = OrderedDict([(c, next(color_cycle)) for c in self.classes_.keys()]) self.ax.pcolormesh(self.xx, self.yy, self.Z_shape, alpha=self.pcolormesh_alpha, cmap=ListedColormap(colors.values())) # Create a data structure to hold the scatter plot representations to_plot = OrderedDict() for index in self.classes_.values(): to_plot[index] = [[], []] # Add each row of the data set to to_plot for plotting for i, row in enumerate(X): row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) x_, y_ = row_[0], row_[1] # look up the y class name if given in init if self.class_labels is not None: target = self.class_labels[y[i]] else: target = y[i] index = self.classes_[target] to_plot[index][0].append(x_) to_plot[index][1].append(y_) # Add the scatter plots from the to_plot function # TODO: store these plots to add more instances to later # TODO: make this a separate function if self.show_scatter: for kls, index in self.classes_.items(): self.ax.scatter(to_plot[index][0], to_plot[index][1], marker=next(self.markers), color=colors[kls], alpha=self.scatter_alpha, s=30, edgecolors='black', label=str(kls), **kwargs) else: labels = [ Patch(color=colors[kls], label=kls) for kls in self.classes_.keys() ] self.ax.legend(handles=labels)
def draw(self, X, y, **kwargs): """ Called from the fit method, this method creates the parallel coordinates canvas and draws each instance and vertical lines on it. """ # Convert from dataframe if is_dataframe(X): X = X.as_matrix() # Choose a subset of samples # TODO: allow selection of a random subset of samples instead of head if isinstance(self.sample, int): self.n_samples = min([self.sample, len(X)]) elif isinstance(self.sample, float): self.n_samples = int(len(X) * self.sample) X = X[:self.n_samples, :] # Normalize if self.normalize is not None: X = self.normalizers[self.normalize].fit_transform(X) # Get the shape of the data nrows, ncols = X.shape # Create the xticks for each column # TODO: Allow the user to specify this feature x = list(range(ncols)) # Create the colors # TODO: Allow both colormap, listed colors, and palette definition # TODO: Make this an independent function or property for override! color_values = resolve_colors( n_colors=len(self.classes_), colormap=self.colormap, colors=self.color ) colors = dict(zip(self.classes_, color_values)) # Track which labels are already in the legend used_legends = set([]) # TODO: Make this function compatible with DataFrames! # TODO: Make an independent function to allow addition of instances! for idx, row in enumerate(X): # TODO: How to map classmap to labels? label = y[idx] # Get the label for the row label = self.classes_[label] if label not in used_legends: used_legends.add(label) self.ax.plot(x, row, color=colors[label], alpha=0.25, label=label, **kwargs) else: self.ax.plot(x, row, color=colors[label], alpha=0.25, **kwargs) # Add the vertical lines # TODO: Make an independent function for override! if self.show_vlines: for idx in x: self.ax.axvline(idx, **self.vlines_kwds) # Set the limits self.ax.set_xticks(x) self.ax.set_xticklabels(self.features_) self.ax.set_xlim(x[0], x[-1])
def fit(self, X, y=None, **kwargs): """ The fit method is the primary drawing input for the visualization since it has both the X and y data required for the viz and the transform method does not. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values kwargs : dict Pass generic arguments to the drawing method Returns ------- self : instance Returns the instance of the transformer/visualizer """ # Convert from pandas data types if is_dataframe(X): # Get column names before reverting to an np.ndarray if self.features_ is None: self.features_ = np.array(X.columns) X = X.values if is_series(y): y = y.values # Assign integer labels to the feature columns from the input if self.features_ is None: self.features_ = np.arange(0, X.shape[1]) # Ensure that all classes are represented in the color mapping (before sample) # NOTE: np.unique also specifies the ordering of the classes if self.classes_ is None: self.classes_ = [str(label) for label in np.unique(y)] # Create the color mapping for each class # TODO: Allow both colormap, listed colors, and palette definition # TODO: Make this an independent function or property for override! color_values = resolve_colors(n_colors=len(self.classes_), colormap=self.colormap, colors=self.color) self._colors = dict(zip(self.classes_, color_values)) # Ticks for each feature specified self._increments = np.arange(len(self.features_)) # Subsample instances X, y = self._subsample(X, y) # Normalize instances if self.normalize is not None: X = self.NORMALIZERS[self.normalize].fit_transform(X) # the super method calls draw and returns self return super(ParallelCoordinates, self).fit(X, y, **kwargs)