def generate_align_vs_year(data_frame, output_folder, file_name): """ Generates a chart of the relation between align and year in dataset. Also saves resulting image as file in given output folder. Parameters: ----------- data_frame : pandas.DataFrame input path to be verified output_folder : str output folder path to save the chart file_name : str file name for generated chart image Returns: ----------- None """ align_vs_year = (alt.Chart(data_frame, title = "Alignment over Time").mark_line().encode( alt.X('year', title = 'Year(1935-2013)', axis=alt.Axis(format='t')), y = alt.Y('count()', title = "Character Count"), color = alt.Color("align", title="Alignment") ).properties(height=300, width=500)) save(align_vs_year, output_folder +"/figures/" + file_name + '.png', method='selenium', webdriver=driver) if verbose: print("Alignment vs year chart created, saved to " + output_folder + "/figures/" + file_name + '.png')
def save_figures(figures: dict, save_dir: str, ext: str = 'svg'): """Save figure objs to specified path with extension Parameters ---------- figures : dict dict of {name: Altair figure obj} save_dir : str root dir to save figures, eg save_dir / name.ext ext : str extension to save figures with """ # create root dir if doesn't exist p_root = Path(save_dir) if not p_root.exists(): p_root.mkdir(parents=True) log.info(f'Created dir: {p_root}') # loop input dict and save figs for name, fig in figures.items(): p = p_root / f'{name}.{ext}' try: save(fig, str(p)) except: log.error(f'Could not save figure at: {p}') raise # re raise error, don't continue if saving fig fails
def tune_regression_tree(self, sample_size, hparams=['max_features', 'max_depth'], folder="."): """ Input: int sample_size - This represents the number of samples per state of tuning. Note that the more samples, the more time it takes. Optional Input: list of strings, String hparams - This represents which parameters to tune. Possible elements of list are: ["max_features", "max_depth"] By default, this will tune all of these parameters. folder - path of the folder where the graphs should be saved defaults to the current folder where this program is located. Do not include final backslash. Output: Will save plots to the local directory representing the parameters that were tuned. """ # Tuning parameters if 'max_features' in hparams: acc_df = pd.DataFrame( columns=['Number of Features', 'test_acc', 'train_acc']) max_features = list( range(self._n + 3, self._features.shape[1], self._n)) index = 0 for i in max_features: for j in range(sample_size): train, test = self.train_regression_tree(max_features=i, max_depth=10, model_get=False) acc_df.loc[index] = pd.Series({ 'Number of Features': i, 'train_acc': train, 'test_acc': test }) index += 1 # Make plot feat_plot = self._plot_accuracy(acc_df, 'Number of Features') save(feat_plot, folder + "/regression_tree_features.html") if 'max_depth' in hparams: acc_df = pd.DataFrame( columns=['Number of Levels Deep', 'test_acc', 'train_acc']) max_depth = list(range(2, 20, 2)) index = 0 for i in max_depth: for j in range(sample_size): train, test = self.train_regression_tree(max_depth=i, model_get=False) acc_df.loc[index] = pd.Series({ 'Number of Levels Deep': i, 'train_acc': train, 'test_acc': test }) index += 1 # Make plot depth_plot = self._plot_accuracy(acc_df, 'Number of Levels Deep') save(depth_plot, folder + "/regression_tree_depth.html")
def test_save_chart_method( spec: JSONDict, fmt: str, method: Union[str, Type[Saver]] ) -> None: fp: Union[io.BytesIO, io.StringIO] if fmt in ["png", "pdf"]: fp = io.BytesIO() else: fp = io.StringIO() valid_formats: Dict[str, List[str]] = {} if method == "node": valid_formats = NodeSaver.valid_formats elif method == "selenium": valid_formats = SeleniumSaver.valid_formats elif isinstance(method, type): valid_formats = method.valid_formats else: raise ValueError(f"unrecognized method: {method}") if fmt not in valid_formats["vega-lite"]: with pytest.raises(ValueError): save(spec, fp, fmt=fmt, method=method) else: save(spec, fp, fmt=fmt, method=method) check_output(fp.getvalue(), fmt)
def tune_neural_net(self, sample_size, hparams=['max_iter', 'hidden_layer_sizes'], folder="."): """ Input: int sample_size - This represents the number of samples per state of tuning. Note that the more samples, the more time it takes. Optional Input: list of strings, String hparams - This represents which parameters to tune. Possible elements of list are: ["max_iter", "hidden_layer_sizes"] By default, this will tune all of these parameters. folder - path of the folder where the graphs should be saved defaults to the current folder where this program is located. Do not include final backslash. Output: Will save plots to the local directory representing the parameters that were tuned. """ # Tuning parameters if 'max_iter' in hparams: acc_df = pd.DataFrame( columns=['Number of Iterations', 'test_acc', 'train_acc']) iters = list(range(200, 2001, 200)) index = 0 for i in iters: for j in range(sample_size): train, test = self.train_neural_net(max_iter=i, model_get=False) acc_df.loc[index] = pd.Series({ 'Number of Iterations': i, 'train_acc': train, 'test_acc': test }) index += 1 # Make plot iter_plot = self._plot_accuracy(acc_df, 'Number of Iterations') save(iter_plot, folder + "/neural_net_iterations.html") if 'hidden_layer_sizes' in hparams: acc_df = pd.DataFrame(columns=[ 'Hidden Layer Configuration', 'test_acc', 'train_acc' ]) layers = [(50, ), (100, ), (50, 50), (100, 50), (100, 100), (100, 50, 50), (100, 100, 50), (100, 100, 100)] index = 0 for i in layers: for j in range(sample_size): train, test = self.train_neural_net(hidden_layer_sizes=i, model_get=False) acc_df.loc[index] = \ pd.Series({'Hidden Layer Configuration': i, 'train_acc': train, 'test_acc': test}) index += 1 # Make plot depth_plot = self._plot_accuracy(acc_df, 'Hidden Layer Configuration') save(depth_plot, folder + "/neural_net_layers.html")
def test_embed_options_save_html(spec: JSONDict, inline: bool, embed_options: JSONDict) -> None: fp = io.StringIO() with alt.renderers.set_embed_options(**embed_options): save(spec, fp, "html", inline=inline) html = fp.getvalue() assert f"const embedOpt = {json.dumps(embed_options or {})};" in html
def generate_align_vs_features(data_frame, output_folder, file_name): """ Generates a chart of the relation between align and other features in dataset. Also saves resulting image as file in given output folder. Parameters: ----------- data_frame : pandas.DataFrame input path to be verified output_folder : str output folder path to save the chart file_name : str file name for generated chart image Returns: ----------- None """ features = ['id', 'eye', 'hair', 'sex', 'gsm', 'publisher'] align_vs_features = (alt.Chart(data_frame).mark_circle().encode( alt.Y(alt.repeat(), type='ordinal'), alt.X('count()', title = "Character Count"), size =alt.Size('count()', legend=alt.Legend(title="Characters")), color = alt.Color("align", legend=alt.Legend(title="Alignment")) ).properties(height=300, width=200).repeat(repeat=features, columns=3)) save(align_vs_features, output_folder +"/figures/" + file_name + '.png', method='selenium', webdriver=driver) if verbose: print("Alignment vs features chart created, saved to " + output_folder + "/figures/" + file_name + '.png')
def test_infer_format(spec: JSONDict) -> None: with temporary_filename(suffix=".html") as filename: with open(filename, "w") as fp: save(spec, fp) with open(filename, "r") as fp: html = fp.read() assert html.strip().startswith("<!DOCTYPE html>")
def generate_align_vs_appearances(data_frame, output_folder, file_name): """ Generates a chart of the relation between align and appearances in dataset. Also saves resulting image as file in given output folder. Parameters: ----------- data_frame : pandas.DataFrame input path to be verified output_folder : str output folder path to save the chart file_name : str file name for generated chart image Returns: ----------- None """ align_vs_appearances = ( alt.Chart( data_frame.dropna(), title="Character Appearances by Alignment" ).mark_boxplot().encode( alt.X('appearances:Q', title = 'Appearances'), y = alt.Y('align:O', title = "Alignment"), color = alt.Color("align", title = "Alignment"), size='count()' ).properties(height=300, width=500)).interactive() save(align_vs_appearances, output_folder +"/figures/" + file_name + '.png', method='selenium', webdriver=driver) if verbose: print("Alignment vs appearances chart created, saved to " + output_folder + "/figures/" + file_name + '.png')
def animate_bezier( xs: np.ndarray, ys: np.ndarray, step: float, filename: str = "bezier", download_folder: str = expanduser("~") + "/Downloads/", ) -> None: ts = np.arange(0, 1 + step, step) ts = np.round_(ts, decimals=num_decimals(step)) colours = color_palette("rocket", len(xs)) colours = list( map( lambda tuple: "#%02x%02x%02x" % (int(tuple[0] * 255), int(tuple[1] * 255), int(tuple[2] * 255)), colours, ) ) dfs = create_df_dict(np.vstack((xs, ys)).T, ts) chart = create_visualisation(dfs, ts, colours) for i in range(len(dfs[1]) - 1): row = dfs[1].iloc[i] row2 = dfs[1].iloc[i + 1] xs = np.hstack((row.loc["x"], row2.loc["x"])) ys = np.hstack((row.loc["y"], row2.loc["y"])) chart += draw_curve_segment(xs, ys, colours[0], 0.2) chart = chart.interactive() save(chart, f"{download_folder}/{filename}.html")
def graph_tp(data): n_subject = 1 save_dir = "tmp/sync_hotstuff_throughput.png" data = pd.DataFrame( {'Number of Clients': num_clients, 'Throughput (Opersations/Second)': data}) chart = alt.Chart(data).mark_line().encode(x='Number of Clients:Q', y='Throughput (Opersations/Second):Q') save(chart, save_dir)
def graph_lat(data): n_subject = 1 save_dir = "tmp/sync_hotstuff_latency.png" data = pd.DataFrame( {'Number of Clients': num_clients, 'Latency': data}) chart = alt.Chart(data).mark_line().encode(x='Number of Clients:Q', y='Latency:Q') save(chart, save_dir)
def test_embed_options_save_html_override(spec: JSONDict) -> None: fp = io.StringIO() embed_options: JSONDict = {"renderer": "svg"} alt_embed_options: JSONDict = {"padding": 20} with alt.renderers.set_embed_options(**alt_embed_options): save(spec, fp, "html", embed_options=embed_options) html = fp.getvalue() assert f"const embedOpt = {json.dumps(embed_options)};" in html
def draw_rerand_trajectory(source): upperbounds = source[source['ub'] == 1][['name', 'time', 'leaks']] #print(upperbounds) lines = alt.Chart( source, height=600, width=900, ).mark_line(strokeDash=[5, 4]).encode( #x='time', y='name', x=alt.Y('time', title='time (second)', axis=alt.Axis(tickMinStep=0.5)), #color='name', #strokeDash='name:0', #color='color', #stroke='color' color=alt.Color('name', legend=None)) circles = alt.Chart(source).mark_circle( color='lightslategray', #color=alt.value("#5B5B61"), opacity=1.0, size=80.0).encode(y='name', x='time') circles2 = alt.Chart(upperbounds).mark_circle( color='black', opacity=1.0, size=100.0, #dx = -4.0 ).encode(y='name', x='time' #facet='name' ) border = alt.Chart(source).mark_image(width=20, height=20).encode(y='name', x='time', url='img') annotation = alt.Chart(source).mark_text(align='left', baseline='middle', opacity=1.0, fontSize=18, dx=-3.5, dy=-13.0).encode(x='time', y='name', text='leaks') #(lines + circles + annotation + border).save('mychart2.html', scale_factor=10.0) chart = alt.layer( lines, circles, circles2, annotation).configure_view(stroke='transparent').configure_axis( labelFontSize=22, titleFontSize=22, #grid=False #tickOffset = 10 tickCount=20) #chart save(chart, "chart.html", scale_factor=2.0)
def validation_plot(): print("Plotting validation") files = [ f"results/res_etcd.simple.go.none.nn_{n_servers}.nc_1.write_ratio_1.mtbf_1.rate_{rate}.duration_60.tag_repeat-{repeat}.res" for rate in [ 1, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000, 22000, 24000, 26000, 28000, 30000 ] for n_servers in [3, 5, 7, 9] for repeat in range(repeats) ] if not files_exist(files): return # Rate Latency validation_data = pd.concat([ pf.read_in_res( f"results/res_etcd.simple.go.none.nn_{n_servers}.nc_1.write_ratio_1.mtbf_1.rate_{rate}.duration_60.tag_repeat-{repeat}.res", { 'repeat': repeat, 'rate': rate, 'n_servers': n_servers }) for rate in [ 1, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000, 20000, 22000, 24000, 26000, 28000, 30000 ] for n_servers in [3, 5, 7, 9] for repeat in range(repeats) ], ignore_index=True) rate_lat_res = achieved_rate_preprocess(validation_data) df = rate_lat_res chart = alt.vconcat() for repeat in range(repeats): chart = alt.vconcat( chart, achieved_rate_plot(df[df['repeat'] == repeat], 100)) alts.save(chart, "figures/validation_rate_latency.pdf") # CDF cdf_data = validation_data cdf_data = cdf_data[cdf_data['rate'] == 10000] group_by = ['n_servers', 'repeat'] cdfs = pd.concat([ process(group, dict(zip(group_by, params)), lambda group: cdf(group)) for params, group in cdf_data.groupby(group_by) ]) chart = alt.vconcat() for repeat in range(repeats): rep_chart = alt.Chart(cdfs).mark_line(clip=True).encode( x=alt.X('latency:Q', axis=alt.Axis(title='Latency (ms)'), scale=alt.Scale(domain=[0, 100])), y=alt.Y('percentile:Q', axis=alt.Axis(title='Cumulative fraction')), color=alt.Color('n_servers:N', legend=None)).properties(height=130) chart = alt.vconcat(chart, rep_chart) alts.save(chart, "figures/validation_cdf.pdf")
def test_save_spec(spec: JSONDict, fmt: str) -> None: fp: Union[io.BytesIO, io.StringIO] if fmt in ["png", "pdf"]: fp = io.BytesIO() else: fp = io.StringIO() save(spec, fp, fmt=fmt) check_output(fp.getvalue(), fmt)
def test_save_chart(chart: alt.TopLevelMixin, fmt: str) -> None: fp: Union[io.BytesIO, io.StringIO] if fmt in ["png", "pdf"]: fp = io.BytesIO() else: fp = io.StringIO() save(chart, fp, fmt=fmt) check_output(fp.getvalue(), fmt)
def test_html_inline(spec: JSONDict, inline: bool) -> None: fp = io.StringIO() save(spec, fp, fmt="html", inline=inline) html = fp.getvalue() cdn_url = "https://cdn.jsdelivr.net" if inline: assert cdn_url not in html else: assert cdn_url in html
def save_altair_plot(fig: alt.Chart) -> None: fname = png_name(fig) save( chart=fig, fp=str(FIGURES.joinpath(fname)), fmt="png", method="selenium", scale_factor=6.0, )
def save_altair(fig, name, driver, path=fig_path): """Saves an altair figure as png and html""" print(path) save( fig, f"{path}/png/{name}.png", method="selenium", webdriver=driver, scale_factor=5, ) fig.save(f"{path}/html/{name}.html")
def test_save_chart_data_warning(chart: alt.TopLevelMixin) -> None: fp = io.StringIO() with alt.data_transformers.enable("json"): with pytest.warns(UserWarning) as record: save(chart, fp, fmt="html") assert len(record) == 1 assert ( record[0] .message.args[0] .startswith("save() may not function properly with the 'json' data transformer") )
def save_chart( chart: alt.Chart, base_path: Union[Path, str], filetypes: List[str], method=None ): base_path = str(base_path) for t in filetypes: path = base_path + "." + t if method == "node" and t in ("svg", "pdf"): method = "node" else: method = None altair_saver.save(chart, safe_file(path), method=method)
def push_report(service): user_email = service.users().getProfile( userId='me').execute()['emailAddress'] print(user_email) global params y_bottom = np.zeros(12) charts = [] df1 = pd.DataFrame() for company_name in params: company = params[company_name] if company_name in IGNORE_LIST or not create_dump(service, company): continue # if company_name in IGNORE_LIST: # continue try: x, y = getData(company_name, company['fname']) x, y = coerceData(x, y) except Exception as e: print(e) continue print(company_name, y) y_bottom = np.add(y_bottom, y) source = pd.DataFrame({ 'month': x, 'spent': y, 'company': [company_name] * 12 }) chart = alt.Chart(source).mark_bar(size=15).encode( x=alt.X('month', title=''), y=alt.Y('spent', title='Amount spent (₹)')).properties(title=company_name, ) charts.append(chart) source.set_index('month') if not df1.size: df1 = source else: df1 = df1.append(source) if not df1.size: print('No data found for ' + user_email) return stackedchart = alt.Chart(df1).mark_bar(size=15).encode( alt.X('month', title=''), y=alt.Y('sum(spent)', title='Amount spent (₹)'), color='company').properties(title='Aggregate Monthly Spending') charts.insert(0, stackedchart) repchart = alt.VConcatChart(vconcat=charts) save(repchart, os.path.dirname(os.path.abspath(__file__)) + '/data/report.png', scale_factor=1.5) with app.app_context(): context = {'amount': np.sum(y_bottom)} email_content = render_template('report.html', **context) push_email(email_content, user_email)
def alt_to_latex(chart, label, caption, path_tex, path_figure, path_figure_reporting='figures/charts', width='width=0.8\\textwidth', position='h'): path_figure = create_tex(label, caption, path_tex, path_figure, path_figure_reporting, width, position) save(chart, path_figure) return chart
def save_plots(output_dir, plots_dict): if os.path.exists(output_dir): pass else: os.makedirs(os.path.dirname(output_dir)) for k, v in plots_dict.items(): try: driver = webdriver.Chrome(ChromeDriverManager().install()) save(v, output_dir + k, method='selenium', webdriver=driver) print("Successfully saved {}".format(k)) except Exception as e: print(e)
def main(input_file, out_dir): covid = pd.read_csv(input_file) line_plot = create_line_plot(covid) ridgeline_plot = create_ridgeline_plot(covid) # check if the output directory already exist; if yes - save the plots in it try: save(line_plot, f"{out_dir}/line_plot.png", method='selenium', webdriver=driver) save(ridgeline_plot, f"{out_dir}/ridgeline_plot.png", method='selenium', webdriver=driver) # if the output directory does not exist - create a new output directory first and then save the plots in it except: os.makedirs(os.path.dirname(out_dir), exist_ok=True) save(line_plot, f"{out_dir}/line_plot.png", method='selenium', webdriver=driver) save(ridgeline_plot, f"{out_dir}/ridgeline_plot.png", method='selenium', webdriver=driver)
def save_plots(output_dir, plots_dict): for k, v in plots_dict.items(): try: chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("start-maximized") # chrome_options.add_argument("disable-infobars") chrome_options.add_argument("--disable-extensions") driver = webdriver.Chrome(options=chrome_options) save(v, output_dir + "/" + k, webdriver=driver) print("Successfully saved {}".format(k)) except Exception as e: print(e)
def save_plot(plot, out, plot_name): """save the plot object Args: plot (alt.Chart): plot bject to save out (string): output directory plot_name (string): name of the plot to be inlcluded in the filename """ if not os.path.exists(out): os.makedirs(out) file_name = f'{out}/eda_{plot_name}.png' driver = webdriver.Chrome() save(plot, file_name, method='selenium', webdriver=driver)
def pdf_download(file, dia): my_file = file save_dir = os.path.dirname(file) if not os.path.exists(save_dir): os.system(f'mkdir -p {save_dir}') if os.path.exists(my_file): os.system(f'rm {my_file}') save(dia, my_file) filename = Config.pdf_name with open(my_file, 'rb') as f: s = f.read() download_button_str = dow.download_button(s, filename, f'Click here to download PDF') st.markdown(download_button_str, unsafe_allow_html=True)
def compare_models(weather, folder): ''' This produces a graph comparing each of the models for accuracy (R^2) This will use the best hyperparameters for the data.csv dataset Input: weather - This is the instance of the WeatherModels class folder - This is the path of the folder the graphs should be stored Should not include the final backslash Output: Will store the graphs in the folder specified ''' acc_df = pd.DataFrame(columns=['Model', 'test_acc']) index = 0 for i in range(10): model = weather.train_linear_regressor(model_get=False) acc_df.loc[index] = pd.Series({ 'Model': 'Linear Regression', 'test_acc': model[1] }) index += 1 model = weather.train_regression_tree(max_features=35, max_depth=8, model_get=False) acc_df.loc[index] = pd.Series({ 'Model': 'Decision Tree Regression', 'test_acc': model[1] }) index += 1 model = weather.train_neural_net(max_iter=500, hidden_layer_sizes=(100, 100, 100), model_get=False) acc_df.loc[index] = pd.Series({ 'Model': 'Neural Network', 'test_acc': model[1] }) index += 1 # Create graph test_df_min = floor(acc_df['test_acc'].min() * 100) / 100.0 test_df_max = floor(acc_df['test_acc'].max() * 100) / 100.0 + 0.01 test_chart = alt.Chart(acc_df).mark_boxplot(size=100).encode( x='Model', y=alt.Y( 'test_acc:Q', axis=alt.Axis(title='Accuracy (R^2)'), scale=alt.Scale(domain=[test_df_min, test_df_max]))).properties( height=400, width=800, title='Accuracy per Model') save(test_chart, folder + "/" + "model_comparison.html")