def initialize_chart(self): x_attr = self.vis.get_attr_by_channel("x")[0] y_attr = self.vis.get_attr_by_channel("y")[0] x_attr_abv = x_attr.attribute y_attr_abv = y_attr.attribute if len(x_attr.attribute) > 25: x_attr_abv = x_attr.attribute[:15] + "..." + x_attr.attribute[-10:] if len(y_attr.attribute) > 25: y_attr_abv = y_attr.attribute[:15] + "..." + y_attr.attribute[-10:] df = self.data.dropna() x_pts = df[x_attr.attribute] y_pts = df[y_attr.attribute] set_fig_code = "" plot_code = "" color_attr = self.vis.get_attr_by_channel("color") if len(color_attr) == 1: color_attr_name = color_attr[0].attribute color_attr_type = color_attr[0].data_type colors = df[color_attr_name].values plot_code += f"colors = df['{color_attr_name}'].values\n" unique = list(set(colors)) vals = [unique.index(i) for i in colors] if color_attr_type == "quantitative": self.fig, self.ax = matplotlib_setup(7, 5) set_fig_code = "fig, ax = plt.subplots(figsize=(7, 5))\n" self.ax.scatter(x_pts, y_pts, c=vals, cmap="Blues", alpha=0.5) plot_code += f"ax.scatter(x_pts, y_pts, c={vals}, cmap='Blues', alpha=0.5)\n" my_cmap = plt.cm.get_cmap("Blues") max_color = max(colors) sm = ScalarMappable(cmap=my_cmap, norm=plt.Normalize(0, max_color)) sm.set_array([]) cbar = plt.colorbar(sm, label=color_attr_name) cbar.outline.set_linewidth(0) plot_code += f"my_cmap = plt.cm.get_cmap('Blues')\n" plot_code += f"""sm = ScalarMappable( cmap=my_cmap, norm=plt.Normalize(0, {max_color}))\n""" plot_code += f"cbar = plt.colorbar(sm, label='{color_attr_name}')\n" plot_code += f"cbar.outline.set_linewidth(0)\n" else: if len(unique) >= 16: unique = unique[:16] maxlen = 0 for i in range(len(unique)): unique[i] = str(unique[i]) if len(unique[i]) > 26: unique[i] = unique[i][:26] + "..." if len(unique[i]) > maxlen: maxlen = len(unique[i]) if maxlen > 20: self.fig, self.ax = matplotlib_setup(9, 5) set_fig_code = "fig, ax = plt.subplots(figsize=(9, 5))\n" else: self.fig, self.ax = matplotlib_setup(7, 5) set_fig_code = "fig, ax = plt.subplots(figsize=(7, 5))\n" cmap = "Set1" if len(unique) > 9: cmap = "tab20c" scatter = self.ax.scatter(x_pts, y_pts, c=vals, cmap=cmap) plot_code += f"scatter = ax.scatter(x_pts, y_pts, c={vals}, cmap={cmap})\n" leg = self.ax.legend( handles=scatter.legend_elements( num=range(0, len(unique)))[0], labels=unique, title=color_attr_name, markerscale=2.0, bbox_to_anchor=(1.05, 1), loc="upper left", ncol=1, frameon=False, fontsize="13", ) scatter.set_alpha(0.5) plot_code += f"""ax.legend( handles=scatter.legend_elements(num=range(0, len({unique})))[0], labels={unique}, title='{color_attr_name}', markerscale=2., bbox_to_anchor=(1.05, 1), loc='upper left', ncol=1, frameon=False, fontsize='13')\n""" plot_code += "scatter.set_alpha(0.5)\n" else: set_fig_code = "fig, ax = plt.subplots(figsize=(4.5, 4))\n" self.ax.scatter(x_pts, y_pts, alpha=0.5) plot_code += f"ax.scatter(x_pts, y_pts, alpha=0.5)\n" self.ax.set_xlabel(x_attr_abv, fontsize="15") self.ax.set_ylabel(y_attr_abv, fontsize="15") self.code += "import numpy as np\n" self.code += "from math import nan\n" self.code += "from matplotlib.cm import ScalarMappable\n" self.code += f"df = pd.DataFrame({str(self.data.to_dict())})\n" self.code += set_fig_code self.code += f"x_pts = df['{x_attr.attribute}']\n" self.code += f"y_pts = df['{y_attr.attribute}']\n" self.code += plot_code self.code += f"ax.set_xlabel('{x_attr_abv}', fontsize='15')\n" self.code += f"ax.set_ylabel('{y_attr_abv}', fontsize='15')\n"
def create_vis(self, vis, standalone=True): """ Input Vis object and return a visualization specification Parameters ---------- vis: lux.vis.Vis Input Vis (with data) standalone: bool Flag to determine if outputted code uses user-defined variable names or can be run independently Returns ------- chart : altair.Chart Output Altair Chart Object """ # Lazy Evaluation for 2D Binning if vis.mark == "scatter" and vis._postbin: vis._mark = "heatmap" PandasExecutor.execute_2D_binning(vis) # If a column has a Period dtype, or contains Period objects, convert it back to Datetime if vis.data is not None: for attr in list(vis.data.columns): if pd.api.types.is_period_dtype( vis.data.dtypes[attr]) or isinstance( vis.data[attr].iloc[0], pd.Period): dateColumn = vis.data[attr] vis.data[attr] = pd.PeriodIndex( dateColumn.values).to_timestamp() if pd.api.types.is_interval_dtype( vis.data.dtypes[attr]) or isinstance( vis.data[attr].iloc[0], pd.Interval): vis.data[attr] = vis.data[attr].astype(str) fig, ax = matplotlib_setup(4.5, 4) if vis.mark == "histogram": chart = Histogram(vis, fig, ax) elif vis.mark == "bar": chart = BarChart(vis, fig, ax) elif vis.mark == "scatter": chart = ScatterChart(vis, fig, ax) elif vis.mark == "line": chart = LineChart(vis, fig, ax) elif vis.mark == "heatmap": chart = Heatmap(vis, fig, ax) elif vis.mark == "geographical": return AltairRenderer().create_vis(vis, False) else: chart = None return chart if chart: plt.tight_layout() if lux.config.plotting_style and ( lux.config.plotting_backend == "matplotlib" or lux.config.plotting_backend == "matplotlib_code"): chart.ax = lux.config.plotting_style(chart.fig, chart.ax) plt.tight_layout() tmpfile = BytesIO() chart.fig.savefig(tmpfile, format="png") chart.chart = base64.b64encode(tmpfile.getvalue()).decode("utf-8") plt.clf() plt.close("all") if self.output_type == "matplotlib": return {"config": chart.chart, "vislib": "matplotlib"} if self.output_type == "matplotlib_code": if lux.config.plotting_style: import inspect chart.code += "\n".join( inspect.getsource( lux.config.plotting_style).split("\n ")[1:-1]) chart.code += "\nfig" chart.code = chart.code.replace("\n\t\t", "\n") return chart.code
def initialize_chart(self): self.tooltip = False x_attr = self.vis.get_attr_by_channel("x")[0] y_attr = self.vis.get_attr_by_channel("y")[0] # Deal with overlong string axes labels x_attr_abv = str(x_attr.attribute) y_attr_abv = str(y_attr.attribute) label_len = lux.config.label_len prefix_len = prefix_len = math.ceil(3.0 * label_len / 5.0) suffix_len = label_len - prefix_len if len(x_attr_abv) > label_len: x_attr_abv = x_attr.attribute[:prefix_len] + "..." + x_attr.attribute[-suffix_len:] if len(y_attr_abv) > label_len: y_attr_abv = y_attr.attribute[:prefix_len] + "..." + y_attr.attribute[-suffix_len:] if x_attr.data_model == "measure": agg_title = get_agg_title(x_attr) measure_attr = x_attr.attribute bar_attr = y_attr.attribute else: agg_title = get_agg_title(y_attr) measure_attr = y_attr.attribute bar_attr = x_attr.attribute k = lux.config.number_of_bars n_bars = len(self.data.iloc[:, 0].unique()) if n_bars > k: # Truncating to only top k remaining_bars = n_bars - k self.data = self.data.nlargest(k, measure_attr) self.ax.text( 0.95, 0.01, f"+ {remaining_bars} more ...", verticalalignment="bottom", horizontalalignment="right", transform=self.ax.transAxes, fontsize=11, fontweight="bold", color="#ff8e04", ) df = self.data bar = df[bar_attr].apply(lambda x: str(x)) bars = list(bar) measurements = list(df[measure_attr]) plot_code = "" color_attr = self.vis.get_attr_by_channel("color") if len(color_attr) == 1: self.fig, self.ax = matplotlib_setup(6, 4) color_attr_name = color_attr[0].attribute colors = df[color_attr_name].values unique = list(set(colors)) d_x = {} d_y = {} for i in unique: d_x[i] = [] d_y[i] = [] for i in range(len(colors)): d_x[colors[i]].append(bars[i]) d_y[colors[i]].append(measurements[i]) for i in range(len(unique)): xval = d_x[unique[i]] yval = d_y[unique[i]] l = unique[i] self.ax.barh(xval, yval, label=l) plot_code += f"ax.barh({xval},{yval}, label='{l}')\n" self.ax.legend( title=color_attr_name, bbox_to_anchor=(1.05, 1), loc="upper left", ncol=1, frameon=False ) plot_code += f"""ax.legend(title='{color_attr_name}', bbox_to_anchor=(1.05, 1), loc='upper left', ncol=1, frameon=False)\n""" else: self.ax.barh(bar, df[measure_attr], align="center") plot_code += f"ax.barh({bar}, {df[measure_attr]}, align='center')\n" y_ticks_abbev = df[bar_attr].apply(lambda x: str(x)[:10] + "..." if len(str(x)) > 10 else str(x)) self.ax.set_yticks(bars) self.ax.set_yticklabels(y_ticks_abbev) self.ax.set_xlabel(x_attr_abv) self.ax.set_ylabel(y_attr_abv) plt.gca().invert_yaxis() self.code += "import numpy as np\n" self.code += "from math import nan\n" self.code += f"df = pd.DataFrame({str(self.data.to_dict())})\n" self.code += f"fig, ax = plt.subplots()\n" self.code += f"bars = df['{bar_attr}']\n" self.code += f"measurements = df['{measure_attr}']\n" self.code += plot_code self.code += f"ax.set_xlabel('{x_attr_abv}')\n" self.code += f"ax.set_ylabel('{y_attr_abv}')\n"
def initialize_chart(self): x_attr = self.vis.get_attr_by_channel("x")[0] y_attr = self.vis.get_attr_by_channel("y")[0] x_attr_abv = x_attr.attribute y_attr_abv = y_attr.attribute if len(x_attr.attribute) > 25: x_attr_abv = x_attr.attribute[:15] + "..." + x_attr.attribute[-10:] if len(y_attr.attribute) > 25: y_attr_abv = y_attr.attribute[:15] + "..." + y_attr.attribute[-10:] df = pd.DataFrame(self.data) x_pts = df[x_attr.attribute] y_pts = df[y_attr.attribute] plot_code = "" color_attr = self.vis.get_attr_by_channel("color") if len(color_attr) == 1: self.fig, self.ax = matplotlib_setup(6, 5) color_attr_name = color_attr[0].attribute color_attr_type = color_attr[0].data_type colors = df[color_attr_name].values plot_code += f"colors = df['{color_attr_name}'].values\n" unique = list(set(colors)) vals = [unique.index(i) for i in colors] if color_attr_type == "quantitative": self.ax.scatter(x_pts, y_pts, c=vals, cmap="Blues", alpha=0.5) plot_code += f"ax.scatter(x_pts, y_pts, c={vals}, cmap='Blues', alpha=0.5)\n" my_cmap = plt.cm.get_cmap("Blues") max_color = max(colors) sm = ScalarMappable(cmap=my_cmap, norm=plt.Normalize(0, max_color)) sm.set_array([]) cbar = plt.colorbar(sm, label=color_attr_name) cbar.outline.set_linewidth(0) plot_code += f"my_cmap = plt.cm.get_cmap('Blues')\n" plot_code += f"""sm = ScalarMappable( cmap=my_cmap, norm=plt.Normalize(0, {max_color}))\n""" plot_code += f"cbar = plt.colorbar(sm, label='{color_attr_name}')\n" plot_code += f"cbar.outline.set_linewidth(0)\n" else: scatter = self.ax.scatter(x_pts, y_pts, c=vals, cmap="Set1") plot_code += f"scatter = ax.scatter(x_pts, y_pts, c={vals}, cmap='Set1')\n" unique = [str(i) for i in unique] leg = self.ax.legend( handles=scatter.legend_elements( num=range(0, len(unique)))[0], labels=unique, title=color_attr_name, markerscale=2.0, bbox_to_anchor=(1.05, 1), loc="upper left", ncol=1, frameon=False, ) scatter.set_alpha(0.5) plot_code += f"""ax.legend( handles=scatter.legend_elements(num=range(0, len({unique})))[0], labels={unique}, title='{color_attr_name}', markerscale=2., bbox_to_anchor=(1.05, 1), loc='upper left', ncol=1, frameon=False,)\n""" plot_code += "scatter.set_alpha(0.5)\n" else: self.ax.scatter(x_pts, y_pts, alpha=0.5) plot_code += f"ax.scatter(x_pts, y_pts, alpha=0.5)\n" self.ax.set_xlabel(x_attr_abv) self.ax.set_ylabel(y_attr_abv) self.code += "import matplotlib.pyplot as plt\n" self.code += "import numpy as np\n" self.code += "from math import nan\n" self.code += "from matplotlib.cm import ScalarMappable\n" self.code += f"fig, ax = plt.subplots()\n" self.code += f"x_pts = df['{x_attr.attribute}']\n" self.code += f"y_pts = df['{y_attr.attribute}']\n" self.code += plot_code self.code += f"ax.set_xlabel('{x_attr_abv}')\n" self.code += f"ax.set_ylabel('{y_attr_abv}')\n"
def initialize_chart(self): self.tooltip = False # tooltip looks weird for line chart x_attr = self.vis.get_attr_by_channel("x")[0] y_attr = self.vis.get_attr_by_channel("y")[0] x_attr_abv = x_attr.attribute y_attr_abv = y_attr.attribute if len(x_attr.attribute) > 25: x_attr_abv = x_attr.attribute[:15] + "..." + x_attr.attribute[-10:] if len(y_attr.attribute) > 25: y_attr_abv = y_attr.attribute[:15] + "..." + y_attr.attribute[-10:] self.data = self.data.dropna( subset=[x_attr.attribute, y_attr.attribute]) df = pd.DataFrame(self.data) x_pts = df[x_attr.attribute] y_pts = df[y_attr.attribute] plot_code = "" color_attr = self.vis.get_attr_by_channel("color") if len(color_attr) == 1: self.fig, self.ax = matplotlib_setup(6, 4) color_attr_name = color_attr[0].attribute color_attr_type = color_attr[0].data_type colors = df[color_attr_name].values unique = list(set(colors)) d_x = {} d_y = {} for i in unique: d_x[i] = [] d_y[i] = [] for i in range(len(colors)): d_x[colors[i]].append(x_pts[i]) d_y[colors[i]].append(y_pts[i]) for i in range(len(unique)): self.ax.plot(d_x[unique[i]], d_y[unique[i]], label=unique[i]) plot_code += f"""ax.plot( {d_x}[{unique}[{i}]], {d_y}[{unique}[{i}]], label={unique}[{i}])\n""" self.ax.legend(title=color_attr_name, bbox_to_anchor=(1.05, 1), loc="upper left", ncol=1, frameon=False) plot_code += f"""ax.legend( title='{color_attr_name}', bbox_to_anchor=(1.05, 1), loc='upper left', ncol=1, frameon=False,)\n""" else: self.ax.plot(x_pts, y_pts) plot_code += f"ax.plot(x_pts, y_pts)\n" x_label = "" y_label = "" if y_attr.data_model == "measure": agg_title = get_agg_title(y_attr) self.ax.set_xlabel(x_attr_abv) self.ax.set_ylabel(agg_title) x_label = x_attr_abv y_label = agg_title else: agg_title = get_agg_title(x_attr) self.ax.set_xlabel(agg_title) self.ax.set_ylabel(y_attr_abv) x_label = agg_title y_label = y_attr_abv self.code += "import numpy as np\n" self.code += "from math import nan\n" self.code += f"fig, ax = plt.subplots()\n" self.code += f"x_pts = df['{x_attr.attribute}']\n" self.code += f"y_pts = df['{y_attr.attribute}']\n" self.code += plot_code self.code += f"ax.set_xlabel('{x_label}')\n" self.code += f"ax.set_ylabel('{y_label}')\n"
def initialize_chart(self): # return NotImplemented x_attr = self.vis.get_attr_by_channel("x")[0] y_attr = self.vis.get_attr_by_channel("y")[0] x_attr_abv = x_attr.attribute y_attr_abv = y_attr.attribute if len(x_attr.attribute) > 25: x_attr_abv = x_attr.attribute[:15] + "..." + x_attr.attribute[-10:] if len(y_attr.attribute) > 25: y_attr_abv = y_attr.attribute[:15] + "..." + y_attr.attribute[-10:] df = self.data plot_code = "" color_attr = self.vis.get_attr_by_channel("color") color_attr_name = "" color_map = "Blues" if len(color_attr) == 1: self.fig, self.ax = matplotlib_setup(6, 4) color_attr_name = color_attr[0].attribute df = pd.pivot_table(data=df, index="xBinStart", values=color_attr_name, columns="yBinStart") color_map = "viridis" plot_code += f"""df = pd.pivot_table( data=df, index='xBinStart', values='{color_attr_name}', columns='yBinStart')\n""" else: df = pd.pivot_table(data=df, index="xBinStart", values="count", columns="yBinStart") df = df.apply(lambda x: np.log(x), axis=1) plot_code += f"""df = pd.pivot_table( df, index='xBinStart', values='count', columns='yBinStart')\n""" plot_code += f"df = df.apply(lambda x: np.log(x), axis=1)\n" df = df.values plt.imshow(df, cmap=color_map) self.ax.set_aspect("auto") plt.gca().invert_yaxis() colorbar_code = "" if len(color_attr) == 1: cbar = plt.colorbar(label=color_attr_name) cbar.outline.set_linewidth(0) colorbar_code += f"cbar = plt.colorbar(label='{color_attr_name}')\n" colorbar_code += f"cbar.outline.set_linewidth(0)\n" self.ax.set_xlabel(x_attr_abv) self.ax.set_ylabel(y_attr_abv) self.ax.grid(False) self.code += "import numpy as np\n" self.code += "from math import nan\n" self.code += f"df = pd.DataFrame({str(self.data.to_dict())})\n" self.code += plot_code self.code += f"df = df.values\n" self.code += f"fig, ax = plt.subplots()\n" self.code += f"plt.imshow(df, cmap='{color_map}')\n" self.code += f"ax.set_aspect('auto')\n" self.code += f"plt.gca().invert_yaxis()\n" self.code += colorbar_code self.code += f"ax.set_xlabel('{x_attr_abv}')\n" self.code += f"ax.set_ylabel('{y_attr_abv}')\n" self.code += f"ax.grid(False)\n"
def initialize_chart(self): self.tooltip = False x_attr = self.vis.get_attr_by_channel("x")[0] y_attr = self.vis.get_attr_by_channel("y")[0] x_attr_abv = x_attr.attribute y_attr_abv = y_attr.attribute if len(x_attr.attribute) > 25: x_attr_abv = x_attr.attribute[:15] + "..." + x_attr.attribute[-10:] if len(y_attr.attribute) > 25: y_attr_abv = y_attr.attribute[:15] + "..." + y_attr.attribute[-10:] if x_attr.data_model == "measure": agg_title = get_agg_title(x_attr) measure_attr = x_attr.attribute bar_attr = y_attr.attribute else: agg_title = get_agg_title(y_attr) measure_attr = y_attr.attribute bar_attr = x_attr.attribute k = 10 self._topkcode = "" n_bars = len(self.data.iloc[:, 0].unique()) if n_bars > k: # Truncating to only top k remaining_bars = n_bars - k self.data = self.data.nlargest(k, measure_attr) self.ax.text( 0.95, 0.01, f"+ {remaining_bars} more ...", verticalalignment="bottom", horizontalalignment="right", transform=self.ax.transAxes, fontsize=11, fontweight="bold", color="#ff8e04", ) self._topkcode = f"""text = alt.Chart(visData).mark_text( x=155, y=142, align="right", color = "#ff8e04", fontSize = 11, text=f"+ {remaining_bars} more ..." ) chart = chart + text\n""" df = pd.DataFrame(self.data) bars = df[bar_attr].apply(lambda x: str(x)) measurements = df[measure_attr] plot_code = "" color_attr = self.vis.get_attr_by_channel("color") if len(color_attr) == 1: self.fig, self.ax = matplotlib_setup(6, 4) color_attr_name = color_attr[0].attribute color_attr_type = color_attr[0].data_type colors = df[color_attr_name].values unique = list(set(colors)) d_x = {} d_y = {} for i in unique: d_x[i] = [] d_y[i] = [] for i in range(len(colors)): d_x[colors[i]].append(bars[i]) d_y[colors[i]].append(measurements[i]) for i in range(len(unique)): self.ax.barh(d_x[unique[i]], d_y[unique[i]], label=unique[i]) plot_code += ( f"ax.barh({d_x}[{unique}[{i}]], {d_y}[{unique}[{i}]], label={unique}[{i}])\n" ) self.ax.legend(title=color_attr_name, bbox_to_anchor=(1.05, 1), loc="upper left", ncol=1, frameon=False) plot_code += f"""ax.legend( title='{color_attr_name}', bbox_to_anchor=(1.05, 1), loc='upper left', ncol=1, frameon=False,)\n""" else: self.ax.barh(bars, measurements, align="center") plot_code += f"ax.barh(bars, measurements, align='center')\n" y_ticks_abbev = df[bar_attr].apply(lambda x: str(x)[:10] + "..." if len(str(x)) > 10 else str(x)) self.ax.set_yticks(bars) self.ax.set_yticklabels(y_ticks_abbev) self.ax.set_xlabel(x_attr_abv) self.ax.set_ylabel(y_attr_abv) plt.gca().invert_yaxis() self.code += "import numpy as np\n" self.code += "from math import nan\n" self.code += f"fig, ax = plt.subplots()\n" self.code += f"bars = df['{bar_attr}']\n" self.code += f"measurements = df['{measure_attr}']\n" self.code += plot_code self.code += f"ax.set_xlabel('{x_attr_abv}')\n" self.code += f"ax.set_ylabel('{y_attr_abv}')\n"