def draw_interaction_lorenz_curve(axis: axes.SubplotBase, data: pd.DataFrame, unique_rev_strs: tp.List[str], consider_in_interactions: bool, consider_out_interactions: bool, line_width: float) -> None: """ Draws a lorenz_curve onto the given axis. Args: axis: matplot axis to draw on data: plotting data """ if consider_in_interactions and consider_out_interactions: data_selector = 'HEAD_Interactions' elif consider_in_interactions: data_selector = 'IN_HEAD_Interactions' elif consider_out_interactions: data_selector = 'OUT_HEAD_Interactions' else: raise AssertionError( "At least one of the in/out interaction needs to be selected") data.sort_values(by=[data_selector, 'time_id'], inplace=True) lor = lorenz_curve(data[data_selector]) axis.plot(unique_rev_strs, lor, color='#cc0099', linewidth=line_width)
def _draw_edges_plt(ax: SubplotBase, tsp: TSP, edges: Iterable[Tuple[int, int]]): for e1, e2 in edges: e1 = np.copy(tsp.cities[e1]) e2 = np.copy(tsp.cities[e2]) e1[1] = tsp.h - e1[1] e2[1] = tsp.h - e2[1] ax.plot(*zip(e1, e2), 'b-')
def _draw_tour_plt(ax: SubplotBase, tsp: TSP, tour: Iterable[Union[int, NDArray]]): s = list(tour) if isinstance(s[0], Number): edges = np.array(list(zip(*list(tsp.tour_segments(s))))) else: edges = np.array(list(zip(*s))) edges[1] = tsp.h - edges[1] ax.plot(*edges, 'b-')
def draw_perfect_lorenz_curve(axis: axes.SubplotBase, unique_rev_strs: tp.List[str], line_width: float) -> None: """ Draws a perfect lorenz curve onto the given axis, i.e., a straight line from the point of origin to the right upper corner. Args: axis: axis to draw to data: plotting data """ axis.plot(unique_rev_strs, np.linspace(0.0, 1.0, len(unique_rev_strs)), color='black', linestyle='--', linewidth=line_width)
def draw_gini_blame_over_time(axis: axes.SubplotBase, blame_data: pd.DataFrame, unique_rev_strs: tp.List[str], consider_in_interactions: bool, consider_out_interactions: bool, line_width: float) -> None: """ Draws the gini coefficients of the blame interactions over time. Args: axis: axis to draw to blame_data: blame data of the base plot consider_in_interactions: True, IN interactions should be included consider_out_interactions: True, OUT interactions should be included line_width: line width of the plot lines """ if consider_in_interactions and consider_out_interactions: data_selector = 'HEAD_Interactions' linestyle = '-' label = "Interactions" elif consider_in_interactions: data_selector = 'IN_HEAD_Interactions' linestyle = '--' label = "IN Interactions" elif consider_out_interactions: data_selector = 'OUT_HEAD_Interactions' linestyle = ':' label = "OUT Interactions" else: raise AssertionError( "At least one of the in/out interaction needs to be selected") gini_coefficients = [] for time_id in blame_data.time_id: distribution = blame_data[ blame_data.time_id <= time_id][data_selector].sort_values( ascending=True) gini_coefficients.append(gini_coefficient(distribution)) axis.plot(unique_rev_strs, gini_coefficients, linestyle=linestyle, linewidth=line_width, label=label, color='#cc0099')
def draw(self, ax: SubplotBase, title: str = '') -> None: """[summary] Arguments: ax {SubplotBase} -- [description] Keyword Arguments: title {str} -- [description] (default: {''}) """ ax.set_title(title, fontsize=16) pos = nx.spring_layout(self.graph) if self.options['draw_edge_labels']: # self.options['edge_labels'] = nx.get_edge_attributes(self.graph, 'weight') self.options['edge_labels'] = {edge: round(weight, 2) for edge, weight in nx.get_edge_attributes(self.graph, 'weight').items()} nx.draw_networkx_edge_labels(self.graph, pos, ax=ax, **self.options) nx.draw(self.graph, pos, ax=ax, **self.options) else: nx.draw(self.graph, pos, ax=ax, **self.options)
def annotate_correlation( x_values: tp.List[int], y_values: tp.List[int], ax: axes.SubplotBase = None, # pylint: disable=unused-argument **kwargs: tp.Any) -> None: """Plot the correlation coefficient in the top right hand corner of a plot.""" ax = ax or plt.gca() pearson_rho, _ = pearsonr(x_values, y_values) ax.annotate(f'$\\mathit{{\\rho_p}}$ = {pearson_rho:.2f}', xy=(.6, .9), xycoords=ax.transAxes, fontsize="small") spearman_rho, _ = spearmanr(x_values, y_values) ax.annotate(f'$\\mathit{{\\rho_s}}$ = {spearman_rho:.2f}', xy=(.6, .77), xycoords=ax.transAxes, fontsize="small")
def visualize_mst_plt(tsp: TSP, mst: Iterable[Tuple[float, Tuple[int, int]]], ax: SubplotBase = None): """Generate visualization of an MST using MatPlotLib backend. Args: tsp (TSP): the problem mst (Iterable[Tuple[float, Typle[int, int]]]): edges in MST ax (SubplotBase): Matplotlib axes to plot on. Defaults to None. """ if ax is None: ax = plt.subplot(111) ax.set_xlim((0, tsp.w)) ax.set_ylim((0, tsp.h)) ax.set_xticks([]) ax.set_yticks([]) ax.set_aspect('equal', 'box') _draw_edges_plt(ax, tsp, list(zip(*mst))[1]) _draw_cities_plt(ax, tsp)
def visualize_tsp_plt(tsp: TSP, tour: Iterable[Union[int, NDArray]], ax: SubplotBase = None): """Generate visualization of a TSP using MatPlotLib backend. Args: tsp (TSP): the problem tour (Iterable[Union[int, NDArray]]): tour either as indices of vertices or as segments ax (SubplotBase): Matplotlib axes to plot on. Defaults to None. """ if ax is None: ax = plt.subplot(111) ax.set_xlim((0, tsp.w)) ax.set_ylim((0, tsp.h)) ax.set_xticks([]) ax.set_yticks([]) ax.set_aspect('equal', 'box') if len(tour): _draw_tour_plt(ax, tsp, tour) _draw_cities_plt(ax, tsp)
def visualize_clusters_plt(tsp: TSP, mst: Iterable[Tuple[float, Tuple[int, int]]], clusters: Iterable[DSNode], ax: SubplotBase = None): """Generate visualization of clusters using MatPlotLib backend. Args: tsp (TSP): the problem mst (Iterable[NDArray]): edges in MST as [[[x1, y1], [x2, y2]], ...] clusters (Iterable[DSNode]): forest of clusters ax (SubplotBase): Matplotlib axes to plot on. Defaults to None. """ if ax is None: ax = plt.subplot(111) ax.set_xlim((0, tsp.w)) ax.set_ylim((0, tsp.h)) ax.set_xticks([]) ax.set_yticks([]) ax.set_aspect('equal', 'box') for edges in _isolate_edges(list(zip(*mst))[1], clusters): _draw_edges_plt(ax, tsp, edges) _draw_cities_plt(ax, tsp)
def _hist( x_values: tp.List[int], ax: axes.SubplotBase = None, # pylint: disable=unused-argument **kwargs: tp.Any) -> None: ax = ax or plt.gca() plt.hist(x_values) # hack to adjust the histogram axis to the off-diag plots' axes. ax2 = ax.twinx() ax2.set_ylim(0, 1) ax2.yaxis.set_visible(False) pad_axes(ax, pad_y=0.01) pad_axes(ax2, pad_y=0.01) align_yaxis(ax, 0, ax2, 0)
def logit_scatterplot( x_values: tp.List[int], y_values: tp.List[int], ax: axes.SubplotBase = None, # pylint: disable=unused-argument **kwargs: tp.Any) -> None: """Plot a scatterplot with clusters as hue and plot a logit that estimates the clusters.""" ax = ax or plt.gca() data = pd.DataFrame({'x_values': x_values, 'y_values': y_values}) data.sort_values(by='y_values', inplace=True) # dychotomize y_values to be able to use logistic regression data['target'] = _cluster_data_by_kmeans(data['y_values']) ax2 = ax.twinx() ax2.set_ylim(0, 1) ax2.yaxis.set_visible(False) # plot logit sns.regplot(x='x_values', y='target', data=data, scatter=False, ci=None, logistic=True, ax=ax2, color='black', line_kws={'alpha': 0.25}) # scatterplot with the two clusters as hue sns.scatterplot( x='x_values', y='y_values', # https://github.com/mwaskom/seaborn/issues/2194 hue=data['target'].tolist(), data=data, ax=ax) pad_axes(ax, 0.01, 0.01) pad_axes(ax2, 0.01, 0.01) align_yaxis(ax, 0, ax2, 0)
def plot_roc_curve(self, y_prob: list, y_true: list, ax1: SubplotBase = None): # use sk-learn to get false positive rate and true positive rate for different thresholds fpr, tpr, thresholds = roc_curve(y_true, y_prob, pos_label=self.config.BUG_STRING) auc_score = auc(fpr, tpr) # set up plot if ax1 is None: _, ax1 = plt.subplots() ax1.title.set_text( f'Receiver Operating Characteristic Curve \n with Area Under Curve: {auc_score:.2f}' ) ax2 = ax1.twinx() # draw lines and legends ax1.plot(fpr, tpr, color='blue', marker=',', label='ROC') ax1.legend(loc=1) # last of fpr is 1 (ignored) and first of thresholds is not an actual threshold (ignored) ax2.plot(fpr[:-1], thresholds[1:], color='green', marker=',', label='Threshold-FP') ax2.legend(loc=2) plt.plot([0, 1], [0, 1], 'k--') # set labels ax1.set_xlabel('False positive rate') ax1.set_ylabel('True positive rate', color='b') ax2.set_ylabel('Threshold', color='g') # match the values on y axes ylim = [0, 1.2] yticks = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] ax1.set_ylim(ylim) ax1.set_yticks(yticks) ax2.set_ylim(ylim) ax2.set_yticks(yticks)
def plot_precision_recall_curve(self, y_prob: list, y_true: list, ax1: SubplotBase = None): # use sk-learn to get precision and recall fro different thresholds lr_precision, lr_recall, thresholds = precision_recall_curve( y_true, y_prob, pos_label=self.config.BUG_STRING) auc_score = auc(lr_recall, lr_precision) # set up plot if ax1 is None: _, ax1 = plt.subplots() ax1.title.set_text( f'Precision-Recall Curve \n with Area Under Curve: {auc_score:.2f}' ) ax2 = ax1.twinx() # draw lines and add legends ax1.plot(lr_recall, lr_precision, color='blue', marker=',', label='Precision-Recall') ax1.legend(loc=1) ax2.plot(lr_recall[:-1], thresholds, color='green', marker=',', label='Threshold-Recall') ax2.legend(loc=2) no_skill = y_true.count('bug') / len(y_true) ax1.plot([0, 1], [no_skill, no_skill], color='black', linestyle='--', label='No Skill') # set labels ax1.set_xlabel('Recall') ax1.set_ylabel('Precision', color='b') ax2.set_ylabel('Threshold', color='g') # match the values on y axes ylim = [0, 1.2] yticks = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] ax1.set_ylim(ylim) ax1.set_yticks(yticks) ax2.set_ylim(ylim) ax2.set_yticks(yticks)
def plot_surface_density_profile( model: astro_dynamo.model.DynamicalModel, ax: SubplotBase = None, target_values: torch.Tensor = None) -> SubplotBase: """Plots the azimuthally averaged surface density of a model. The model must contain a SurfaceDensity target to be plotted. If supplied plots into axis ax, otherwise creates a new figure.""" if ax is None: f, axs = plt.subplots(1, 1) axs[-1, -1].axis('off') try: surface_density_obj = next( target for target in model.targets if type(target) == astro_dynamo.targets.SurfaceDensity) except IndexError: raise TypeError("Couldn't find a SurfaceDensity target in the model.") ax.semilogy(surface_density_obj.rmid.cpu(), surface_density_obj(model).detach().cpu(), label='Model') if target_values is not None: ax.semilogy(surface_density_obj.rmid.cpu(), target_values.detach().cpu(), label='Target') ax.set_xlabel('r') ax.set_ylabel(r'$\Sigma$') ax.set_ylim(1, 1e4) ax.legend() return ax
def _draw_obstacles_plt(ax: SubplotBase, tsp: TSP_O): for a, b in tsp.obstacles: a[1] = tsp.h - a[1] b[1] = tsp.h - b[1] ax.plot(*zip(a, b), 'k-')
def __init__(self, fig, *args, **kwargs): SubplotBase.__init__(self, fig, *args) WcsAxes.__init__(self, fig, [self.figLeft, self.figBottom, self.figW, self.figH], **kwargs)
def draw_gini_churn_over_time(axis: axes.SubplotBase, blame_data: pd.DataFrame, unique_rev_strs: tp.List[str], project_name: str, commit_map: CommitMap, consider_insertions: bool, consider_deletions: bool, line_width: float) -> None: """ Draws the gini of the churn distribution over time. Args: axis: axis to draw to blame_data: blame data of the base plot project_name: name of the project commit_map: CommitMap for the given project(by project_name) consider_insertions: True, insertions should be included consider_deletions: True, deletions should be included line_width: line width of the plot lines """ churn_data = build_repo_churn_table(project_name, commit_map) # clean data unique_revs = blame_data['revision'].unique() def remove_revisions_without_data(revision: ShortCommitHash) -> bool: """Removes all churn data where this plot has no data.""" return revision.hash[:10] in unique_revs churn_data = churn_data[churn_data.apply( lambda x: remove_revisions_without_data(x['revision']), axis=1)] # reorder churn data to match blame_data churn_data.set_index('time_id', inplace=True) churn_data = churn_data.reindex(index=blame_data['time_id']) churn_data = churn_data.reset_index() gini_churn = [] for time_id in blame_data['time_id']: if consider_insertions and consider_deletions: distribution = ( churn_data[churn_data.time_id <= time_id].insertions + churn_data[churn_data.time_id <= time_id].deletions ).sort_values(ascending=True) elif consider_insertions: distribution = churn_data[ churn_data.time_id <= time_id].insertions.sort_values( ascending=True) elif consider_deletions: distribution = churn_data[ churn_data.time_id <= time_id].deletions.sort_values( ascending=True) else: raise AssertionError( "At least one of the in/out interaction needs to be selected") gini_churn.append(gini_coefficient(distribution)) if consider_insertions and consider_deletions: linestyle = '-' label = 'Insertions + Deletions' elif consider_insertions: linestyle = '--' label = 'Insertions' else: linestyle = ':' label = 'Deletions' axis.plot(unique_rev_strs, gini_churn, linestyle=linestyle, linewidth=line_width, label=label, color='orange')
def plot_roc(self, ax: SubplotBase, count_stimulus: Sequence[int], count_nostimulus: Sequence[int], show_x_label: bool, show_y_label: bool, plainroc: bool, subtitle: str) -> None: extraspace = 0.05 sdtval = self.get_sdt_values(count_stimulus, count_nostimulus) # Calculate d' for all pairs but the last if plainroc: x = sdtval["fa"] y = sdtval["h"] xlabel = "FA" ylabel = "H" ax.set_xlim(0 - extraspace, 1 + extraspace) ax.set_ylim(0 - extraspace, 1 + extraspace) else: x = sdtval["z_fa"] y = sdtval["z_h"] xlabel = "Z(FA)" ylabel = "Z(H)" # Plot ax.plot(x, y, marker="+", color="b", linestyle="-") ax.set_xlabel(xlabel if show_x_label else "") ax.set_ylabel(ylabel if show_y_label else "") ax.set_title(subtitle)
def _draw_cities_plt(ax: SubplotBase, tsp: TSP): cities = np.array(list(zip(*tsp.cities))) cities[1] = tsp.h - cities[1] ax.plot(*cities, 'ro')