def grouped_violinplot(data1, data2, normalize=False, group_names=['data1', 'data2'], filename=None): """ Plot a grouped violinplot to compare two datasets The datasets can be normalized by the median and standard deviation of data1. Parameters ---------- data1: DataFrame Data set, columns = variable names data2: DataFrame Data set, columns = variable names normalize : bool, optional Normalize both datasets by the median and standard deviation of data1 group_names : list, optional Names used in the legend filename: string, optional Filename used to save the figure """ assert isinstance(data1, pd.DataFrame) assert isinstance(data2, pd.DataFrame) assert isinstance(normalize, bool) assert isinstance(group_names, list) assert isinstance(filename, (type(None), str)) data = _get_grouped_data(data1, data2, normalize, group_names) plt.figure() sns.violinplot(data=data, hue='set', y='value', x='columns', order=data1.columns, split=True) plt.gca().legend().set_title('') plt.gca().set_xlabel('') plt.gca().set_ylabel('') if filename is None: plt.show() else: plt.savefig(filename) plt.close()
def visualize_model_graph(self, type_of_graph='constraint', filename=None, pos=None): """ This function draws a graph of the communities for a Pyomo model. The type_of_graph parameter is used to create either a variable-node graph, constraint-node graph, or bipartite graph of the Pyomo model. Then, the nodes are colored based on the communities they are in - which is based on the community map (self.community_map). A filename can be provided to save the figure, otherwise the figure is illustrated with matplotlib. Parameters ---------- type_of_graph: str, optional a string that specifies the types of nodes drawn on the model graph, the default is 'constraint'. 'constraint' draws a graph with constraint nodes, 'variable' draws a graph with variable nodes, 'bipartite' draws a bipartite graph (with both constraint and variable nodes) filename: str, optional a string that specifies a path for the model graph illustration to be saved pos: dict, optional a dictionary that maps node keys to their positions on the illustration Returns ------- fig: matplotlib figure the figure for the model graph drawing pos: dict a dictionary that maps node keys to their positions on the illustration - can be used to create consistent layouts for graphs of a given model """ # Check that all arguments are of the correct type assert type_of_graph in ('bipartite', 'constraint', 'variable'), \ "Invalid graph type specified: 'type_of_graph=%s' - Valid values: " \ "'bipartite', 'constraint', 'variable'" % type_of_graph assert isinstance(filename, (type(None), str)), "Invalid value for filename: 'filename=%s' - filename " \ "must be a string" % filename # No assert statement for pos; the NetworkX function can handle issues with the pos argument # There is a possibility that the desired networkX graph of the model is already stored in the # CommunityMap object (because the networkX graph is required to create the CommunityMap object) if type_of_graph != self.type_of_community_map: # Use the generate_model_graph function to create a NetworkX graph of the given model (along with # number_component_map and constraint_variable_map, which will be used to help with drawing the graph) model_graph, number_component_map, constraint_variable_map = generate_model_graph( self.model, type_of_graph=type_of_graph, with_objective=self.with_objective, weighted_graph=self.weighted_graph, use_only_active_components=self.use_only_active_components) else: # This is the case where, as mentioned above, we can use the networkX graph that was made to create # the CommunityMap object model_graph, number_component_map, constraint_variable_map = self.graph, self.graph_node_mapping, \ self.constraint_variable_map # This line creates the "reverse" of the number_component_map above, since mapping the Pyomo # components to their nodes in the networkX graph is more convenient in this function component_number_map = ComponentMap( (comp, number) for number, comp in number_component_map.items()) # Create a deep copy of the community_map attribute to avoid destructively modifying it numbered_community_map = copy.deepcopy(self.community_map) # Now we will use the component_number_map to change the Pyomo modeling components in community_map into the # numbers that correspond to their nodes/edges in the NetworkX graph, model_graph for key in self.community_map: numbered_community_map[key] = ([ component_number_map[component] for component in self.community_map[key][0] ], [ component_number_map[component] for component in self.community_map[key][1] ]) # Based on type_of_graph, which specifies what Pyomo modeling components are to be drawn as nodes in the graph # illustration, we will now get the node list and the color list, which describes how to color nodes # according to their communities (which is based on community_map) if type_of_graph == 'bipartite': list_of_node_lists = [ list_of_nodes for list_tuple in numbered_community_map.values() for list_of_nodes in list_tuple ] # list_of_node_lists is (as it implies) a list of lists, so we will use the list comprehension # below to flatten the list and get our one-dimensional node list node_list = [ node for sublist in list_of_node_lists for node in sublist ] color_list = [] # Now, we will find the first community that a node appears in and color the node based on that community # In community_map, certain nodes may appear in multiple communities, and we have chosen to give preference # to the first community a node appears in for node in node_list: not_found = True for community_key in numbered_community_map: if not_found and node in ( numbered_community_map[community_key][0] + numbered_community_map[community_key][1]): color_list.append(community_key) not_found = False # Find top_nodes (one of the two "groups" of nodes in a bipartite graph), which will be used to # determine the graph layout if model_graph.number_of_nodes() > 0 and nx.is_connected( model_graph): # An index of 1 used because this tends to place constraint nodes on the left, which is # consistent with the else case top_nodes = nx.bipartite.sets(model_graph)[1] else: top_nodes = { node for node in model_graph.nodes() if node in constraint_variable_map } if pos is None: # The case where the user has not provided their own layout pos = nx.bipartite_layout(model_graph, top_nodes) else: # This covers the case that type_of_community_map is 'constraint' or 'variable' # Constraints are in the first list of the tuples in community map and variables are in the second list position = 0 if type_of_graph == 'constraint' else 1 list_of_node_lists = list(i[position] for i in numbered_community_map.values()) # list_of_node_lists is (as it implies) a list of lists, so we will use the list comprehension # below to flatten the list and get our one-dimensional node list node_list = [ node for sublist in list_of_node_lists for node in sublist ] # Now, we will find the first community that a node appears in and color the node based on # that community (in numbered_community_map, certain nodes may appear in multiple communities, # and we have chosen to give preference to the first community a node appears in) color_list = [] for node in node_list: not_found = True for community_key in numbered_community_map: if not_found and node in numbered_community_map[ community_key][position]: color_list.append(community_key) not_found = False # Note - there is no strong reason to choose spring layout; it just creates relatively clean graphs if pos is None: # The case where the user has not provided their own layout pos = nx.spring_layout(model_graph) # Define color_map color_map = plt.cm.get_cmap('viridis', len(numbered_community_map)) # Create the figure and draw the graph fig = plt.figure() nx.draw_networkx_nodes(model_graph, pos, nodelist=node_list, node_size=40, cmap=color_map, node_color=color_list) nx.draw_networkx_edges(model_graph, pos, alpha=0.5) # Make the main title graph_type = type_of_graph.capitalize() community_map_type = self.type_of_community_map.capitalize() main_graph_title = "%s graph - colored using %s community map" % ( graph_type, community_map_type) main_font_size = 14 plt.suptitle(main_graph_title, fontsize=main_font_size) # Define a dict that will be used for the graph subtitle subtitle_naming_dict = { 'bipartite': 'Nodes are variables and constraints & Edges are variables in a constraint', 'constraint': 'Nodes are constraints & Edges are common variables', 'variable': 'Nodes are variables & Edges are shared constraints' } # Make the subtitle subtitle_font_size = 11 plt.title(subtitle_naming_dict[type_of_graph], fontsize=subtitle_font_size) if filename is None: plt.show() else: plt.savefig(filename) plt.close() # Return the figure and pos, the position dictionary used for the graph layout return fig, pos
def pairwise_plot(theta_values, theta_star=None, alpha=None, distributions=[], axis_limits=None, title=None, add_obj_contour=True, add_legend=True, filename=None): """ Plot pairwise relationship for theta values, and optionally alpha-level confidence intervals and objective value contours Parameters ---------- theta_values: DataFrame or tuple * If theta_values is a DataFrame, then it contains one column for each theta variable and (optionally) an objective value column ('obj') and columns that contains Boolean results from confidence interval tests (labeled using the alpha value). Each row is a sample. * Theta variables can be computed from ``theta_est_bootstrap``, ``theta_est_leaveNout``, and ``leaveNout_bootstrap_test``. * The objective value can be computed using the ``likelihood_ratio_test``. * Results from confidence interval tests can be computed using the ``leaveNout_bootstrap_test``, ``likelihood_ratio_test``, and ``confidence_region_test``. * If theta_values is a tuple, then it contains a mean, covariance, and number of samples (mean, cov, n) where mean is a dictionary or Series (indexed by variable name), covariance is a DataFrame (indexed by variable name, one column per variable name), and n is an integer. The mean and covariance are used to create a multivariate normal sample of n theta values. The covariance can be computed using ``theta_est(calc_cov=True)``. theta_star: dict or Series, optional Estimated value of theta. The dictionary or Series is indexed by variable name. Theta_star is used to slice higher dimensional contour intervals in 2D alpha: float, optional Confidence interval value, if an alpha value is given and the distributions list is empty, the data will be filtered by True/False values using the column name whose value equals alpha (see results from ``leaveNout_bootstrap_test``, ``likelihood_ratio_test``, and ``confidence_region_test``) distributions: list of strings, optional Statistical distribution used to define a confidence region, options = 'MVN' for multivariate_normal, 'KDE' for gaussian_kde, and 'Rect' for rectangular. Confidence interval is a 2D slice, using linear interpolation at theta_star. axis_limits: dict, optional Axis limits in the format {variable: [min, max]} title: string, optional Plot title add_obj_contour: bool, optional Add a contour plot using the column 'obj' in theta_values. Contour plot is a 2D slice, using linear interpolation at theta_star. add_legend: bool, optional Add a legend to the plot filename: string, optional Filename used to save the figure """ assert isinstance(theta_values, (pd.DataFrame, tuple)) assert isinstance(theta_star, (type(None), dict, pd.Series, pd.DataFrame)) assert isinstance(alpha, (type(None), int, float)) assert isinstance(distributions, list) assert set(distributions).issubset(set(['MVN', 'KDE', 'Rect'])) assert isinstance(axis_limits, (type(None), dict)) assert isinstance(title, (type(None), str)) assert isinstance(add_obj_contour, bool) assert isinstance(filename, (type(None), str)) # If theta_values is a tuple containing (mean, cov, n), create a DataFrame of values if isinstance(theta_values, tuple): assert(len(theta_values) == 3) mean = theta_values[0] cov = theta_values[1] n = theta_values[2] if isinstance(mean, dict): mean = pd.Series(mean) theta_names = mean.index mvn_dist = stats.multivariate_normal(mean, cov) theta_values = pd.DataFrame(mvn_dist.rvs(n, random_state=1), columns=theta_names) assert(theta_values.shape[0] > 0) if isinstance(theta_star, dict): theta_star = pd.Series(theta_star) if isinstance(theta_star, pd.DataFrame): theta_star = theta_star.loc[0,:] theta_names = [col for col in theta_values.columns if (col not in ['obj']) and (not isinstance(col, float)) and (not isinstance(col, int))] # Filter data by alpha if (alpha in theta_values.columns) and (len(distributions) == 0): thetas = theta_values.loc[theta_values[alpha] == True, theta_names] else: thetas = theta_values[theta_names] if theta_star is not None: theta_star = theta_star[theta_names] legend_elements = [] g = sns.PairGrid(thetas) # Plot histogram on the diagonal # Note: distplot is deprecated and will be removed in a future # version of seaborn, use histplot. distplot is kept for older # versions of python. if check_min_version(sns, "0.11"): g.map_diag(sns.histplot) else: g.map_diag(sns.distplot, kde=False, hist=True, norm_hist=False) # Plot filled contours using all theta values based on obj if 'obj' in theta_values.columns and add_obj_contour: g.map_offdiag(_add_obj_contour, columns=theta_names, data=theta_values, theta_star=theta_star) # Plot thetas g.map_offdiag(plt.scatter, s=10) legend_elements.append(matplotlib.lines.Line2D( [0], [0], marker='o', color='w', label='thetas', markerfacecolor='cadetblue', markersize=5)) # Plot theta* if theta_star is not None: g.map_offdiag(_add_scatter, color='k', columns=theta_names, theta_star=theta_star) legend_elements.append(matplotlib.lines.Line2D( [0], [0], marker='o', color='w', label='theta*', markerfacecolor='k', markersize=6)) # Plot confidence regions colors = ['r', 'mediumblue', 'darkgray'] if (alpha is not None) and (len(distributions) > 0): if theta_star is None: print("""theta_star is not defined, confidence region slice will be plotted at the mean value of theta""") theta_star = thetas.mean() mvn_dist = None kde_dist = None for i, dist in enumerate(distributions): if dist == 'Rect': lb, ub = fit_rect_dist(thetas, alpha) g.map_offdiag(_add_rectangle_CI, color=colors[i], columns=theta_names, lower_bound=lb, upper_bound=ub) legend_elements.append(matplotlib.lines.Line2D( [0], [0], color=colors[i], lw=1, label=dist)) elif dist == 'MVN': mvn_dist = fit_mvn_dist(thetas) Z = mvn_dist.pdf(thetas) score = stats.scoreatpercentile(Z, (1-alpha)*100) g.map_offdiag(_add_scipy_dist_CI, color=colors[i], columns=theta_names, ncells=100, alpha=score, dist=mvn_dist, theta_star=theta_star) legend_elements.append(matplotlib.lines.Line2D( [0], [0], color=colors[i], lw=1, label=dist)) elif dist == 'KDE': kde_dist = fit_kde_dist(thetas) Z = kde_dist.pdf(thetas.transpose()) score = stats.scoreatpercentile(Z, (1-alpha)*100) g.map_offdiag(_add_scipy_dist_CI, color=colors[i], columns=theta_names, ncells=100, alpha=score, dist=kde_dist, theta_star=theta_star) legend_elements.append(matplotlib.lines.Line2D( [0], [0], color=colors[i], lw=1, label=dist)) _set_axis_limits(g, axis_limits, thetas, theta_star) for ax in g.axes.flatten(): ax.ticklabel_format(style='sci', scilimits=(-2,2), axis='both') if add_legend: xvar, yvar, loc = _get_variables(ax, theta_names) if loc == (len(theta_names)-1,0): ax.legend(handles=legend_elements, loc='best', prop={'size': 8}) if title: g.fig.subplots_adjust(top=0.9) g.fig.suptitle(title) # Work in progress # Plot lower triangle graphics in separate figures, useful for presentations lower_triangle_only = False if lower_triangle_only: for ax in g.axes.flatten(): xvar, yvar, (xloc, yloc) = _get_variables(ax, theta_names) if xloc < yloc: # lower triangle ax.remove() ax.set_xlabel(xvar) ax.set_ylabel(yvar) fig = plt.figure() ax.figure=fig fig.axes.append(ax) fig.add_axes(ax) f, dummy = plt.subplots() bbox = dummy.get_position() ax.set_position(bbox) dummy.remove() plt.close(f) ax.tick_params(reset=True) if add_legend: ax.legend(handles=legend_elements, loc='best', prop={'size': 8}) plt.close(g.fig) if filename is None: plt.show() else: plt.savefig(filename) plt.close()
def make3dPlot(expr, numticks=30, show_plot=False): ccSurf = [None] * ((numticks + 1)**2) cvSurf = [None] * ((numticks + 1)**2) fvals = [None] * ((numticks + 1)**2) xaxis2d = [None] * ((numticks + 1)**2) yaxis2d = [None] * ((numticks + 1)**2) ccAffine = [None] * ((numticks + 1)**2) cvAffine = [None] * ((numticks + 1)**2) eqn = mc(expr) vars = identify_variables(expr) x = next(vars) y = next(vars) x_tick_length = (x.ub - x.lb) / numticks y_tick_length = (y.ub - y.lb) / numticks xaxis = [x.lb + x_tick_length * n for n in range(numticks + 1)] yaxis = [y.lb + y_tick_length * n for n in range(numticks + 1)] # Making the affine tangent planes ccSlope = eqn.subcc() cvSlope = eqn.subcv() x_val = value(x) y_val = value(y) f_cc = eqn.concave() f_cv = eqn.convex() # To Visualize Concave Affine Plane for different points for i, x_tick in enumerate(xaxis): eqn.changePoint(x, x_tick) for j, y_tick in enumerate(yaxis): ccAffine[i + (numticks + 1) * j] = ( ccSlope[x] * (x_tick - x_val) + ccSlope[y] * (y_tick - y_val) + f_cc) cvAffine[i + (numticks + 1) * j] = ( cvSlope[x] * (x_tick - x_val) + cvSlope[y] * (y_tick - y_val) + f_cv) xaxis2d[i + (numticks + 1) * j] = x_tick yaxis2d[i + (numticks + 1) * j] = y_tick eqn.changePoint(y, y_tick) ccSurf[i + (numticks + 1) * j] = eqn.concave() cvSurf[i + (numticks + 1) * j] = eqn.convex() fvals[i + (numticks + 1) * j] = value(expr) if show_plot: from mpl_toolkits.mplot3d import Axes3D assert Axes3D # silence pyflakes # Plotting Solutions in 3D fig = plt.figure() ax = fig.add_subplot(1, 1, 1, projection='3d') ax.scatter(xaxis2d, yaxis2d, cvSurf, color='b') ax.scatter(xaxis2d, yaxis2d, fvals, color='r') ax.scatter(xaxis2d, yaxis2d, ccSurf, color='b') # To Visualize Concave Affine Plane for different points ax.scatter(xaxis2d, yaxis2d, cvAffine, color='k') # Create a better view ax.view_init(10, 270) plt.show() return ccSurf, cvSurf, ccAffine, cvAffine