def _build_all_charts(vis_data, sample_name=''): """Build all charts and combine into a single interface.""" # Row 1 type_chart = _build_type_chart(vis_data['variant_type_counts']) depth_chart = _build_depth_histogram(vis_data['depth_histogram']) qual_histogram = _build_qual_histogram(vis_data['qual_histogram']) gq_histogram = _build_gq_histogram(vis_data['gq_histogram']) row1 = alt.hconcat(type_chart, depth_chart, qual_histogram, gq_histogram) \ .resolve_scale(color='independent') # Row 2 vaf_histograms, other_vaf_histograms = _build_vaf_histograms( vis_data['vaf_histograms_by_genotype']) row2 = alt.hconcat(vaf_histograms, other_vaf_histograms) # Row 3 base_change_chart = _build_base_change_chart(vis_data['base_changes']) indel_size_chart = _build_indel_size_chart(vis_data['indel_sizes']) tt_chart = _build_tt_chart(vis_data['titv_counts']) row3 = alt.hconcat(base_change_chart, tt_chart, indel_size_chart) \ .resolve_scale(color='independent') # Putting it all together all_charts = alt.vconcat(row1, row2, row3) all_charts = all_charts.properties(title=sample_name, spacing=70) \ .configure_header(labelFontSize=16, titleFontSize=20) \ .configure_title(fontSize=20) return all_charts
def plot_line_by_key_altair(ds, key, title_fn=lambda x: '', cmap='viridis', c_sort="descending", c_title=''): """Make line plots of Q1 and Q2 for different levels of a dataset""" if not c_title: c_title = key df = _to_plottable_dataframe(ds) z = alt.Y('z', axis=alt.Axis(title='z (m)')) color = alt.Color(key, scale=alt.Scale(scheme=cmap), sort=c_sort, legend=alt.Legend(title=c_title)) labels = [ ('a', 'QT', 'g/kg','Total Water'), ('b', 'SLI', 'K', 'Liquid-Ice Static Energy'), ('c', 'Q1', 'K/day', 'Average Q₁'), ('d', 'Q2', 'g/kg/day', 'Average Q₂'), ('e', 'Q1NN', 'K/day', 'Q₁ Prediction'), ('f', 'Q2NN', 'g/kg/day', 'Q₂ Prediction') ] charts = [] for letter, key, unit, label in labels: chart = (alt.Chart(df, width=150).mark_line() .encode(alt.X(key, axis=alt.Axis(title=unit)), z, color, order='z') .properties(title=f'{letter}) {label}') ) charts.append(chart) row1 = alt.hconcat(*charts[:3]) row2 = alt.hconcat(*charts[3:]) return alt.vconcat(row1, row2, title=title_fn(ds))
def summarize_vgfs(input_file, output_dir, groupby_column='scaffold', max_auxiliary_score=3, remove_transposons=False, remove_fs=False): start_time = datetime.now() # set up annotations = pd.read_csv(input_file, sep='\t', index_col=0) db_locs = get_database_locs() if 'genome_summary_form' not in db_locs: raise ValueError( 'Genome summary form location must be set in order to summarize genomes' ) mkdir(output_dir) genome_summary_form = pd.read_csv(db_locs['genome_summary_form'], sep='\t', index_col=0) print('%s: Retrieved database locations and descriptions' % (str(datetime.now() - start_time))) # get potential AMGs # potential_amgs = filter_to_amgs(annotations.fillna(''), max_aux=max_auxiliary_score, # remove_transposons=remove_transposons, remove_fs=remove_fs, remove_js=remove_js) potential_amgs = filter_to_amgs(annotations.fillna(''), max_aux=max_auxiliary_score, remove_transposons=remove_transposons, remove_fs=remove_fs) print('%s: Determined potential amgs' % (str(datetime.now() - start_time))) # make distillate viral_genome_stats = make_viral_stats_table(annotations, potential_amgs, groupby_column) viral_genome_stats.to_csv(path.join(output_dir, 'vMAG_stats.tsv'), sep='\t') print('%s: Calculated viral genome statistics' % (str(datetime.now() - start_time))) viral_distillate = make_viral_distillate(potential_amgs, genome_summary_form) viral_distillate.to_csv(path.join(output_dir, 'amg_summary.tsv'), sep='\t', index=None) print('%s: Generated AMG summary' % (str(datetime.now() - start_time))) # make liquor vgf_order = make_vgf_order(potential_amgs) amg_column = make_amg_count_column(potential_amgs, vgf_order) viral_function_df = make_viral_functional_df(potential_amgs, genome_summary_form, groupby_column=groupby_column) viral_functional_heatmap = make_viral_functional_heatmap( viral_function_df, vgf_order) alt.hconcat(amg_column, viral_functional_heatmap, spacing=5).save(path.join(output_dir, 'product.html')) print('%s: Generated product heatmap' % (str(datetime.now() - start_time))) print("%s: Completed distillation" % str(datetime.now() - start_time))
def outcome_summary(df, c_lim=0, d_lim=0, ao_lim=110, ao_base=True, tcase=False, color='black', hspacing=20, vspacing=20, **kwargs): c_var = 'ki' if tcase else 'c' c = 1e6 * df[c_var].diff() d = 1e6 * df['d'].diff() a = 1e2 * df['act'] o = 1e2 * df['out'] c_max = np.maximum(c_lim, c.quantile(0.9, axis=1).max()) d_max = np.maximum(d_lim, d.quantile(0.9, axis=1).max()) ch_c = path_dist(c, title='Daily cases per million', y_max=c_max, **kwargs) ch_d = path_dist(d, title='Daily deaths per million', y_max=d_max, **kwargs) ch_a = path_dist(a, title='Economic activity (%)', y_max=ao_lim, **kwargs) ch_o = path_dist(o, title='Economic output (%)', y_max=ao_lim, **kwargs) if ao_base: base = 99.4 * np.ones(len(df)) a_base = pd.DataFrame({'base': base}, index=a.index).reset_index() ch_ab = alt.Chart(a_base).mark_line(strokeDash=[5, 2], strokeWidth=1, color=color).encode(x='date', y='base') ch_a += ch_ab o_base = pd.DataFrame({'base': base}, index=o.index).reset_index() ch_ob = alt.Chart(o_base).mark_line(strokeDash=[5, 2], strokeWidth=1, color=color).encode(x='date', y='base') ch_o += ch_ob ch = alt.vconcat( alt.hconcat(ch_c, ch_d, spacing=hspacing), alt.hconcat(ch_a, ch_o, spacing=hspacing), spacing=vspacing, ) ch = ch.configure_axisY(minExtent=40, labelFlush=True) ch = ch.configure_axis(domainColor=color, tickColor=color, labelColor=color) ch = ch.configure_title(color=color) return ch
def make_liquor_heatmap(module_coverage_frame, etc_coverage_df, function_df, mag_order=None, labels=None): module_coverage_heatmap = make_module_coverage_heatmap(module_coverage_frame, mag_order) etc_heatmap = make_etc_coverage_heatmap(etc_coverage_df, mag_order=mag_order) if labels is not None: function_df, mag_order = rename_genomes_to_taxa(function_df, labels, mag_order) function_heatmap = make_functional_heatmap(function_df, mag_order) liquor = alt.hconcat(alt.hconcat(module_coverage_heatmap, etc_heatmap), function_heatmap) return liquor
def histograms(self): """ Produces a histogram for every tag that has been optimised, laid out horizontally to each other. :return: the produced graph """ allGraphs = self.histogram("Health") allGraphs = alt.hconcat(allGraphs, self.histogram("Magic")) for (tag, _, _) in ChromosomeController.nondominatedFront[0].tags: allGraphs = alt.hconcat(allGraphs, self.histogram(tag)) allGraphs.properties( title="Histograms" ) return allGraphs
def explore_dataset(users, movies, ratings): # Exploring the MovieLens Data (Users) print(users.describe()) # User features print(users.describe(include=[np.object])) # Categorical user features # Create filters to slice the data occupation_filter = alt.selection_multi(fields=["occupation"]) occupation_chart = alt.Chart().mark_bar().encode( x="count()", y=alt.Y("occupation:N"), color=alt.condition( occupation_filter, alt.Color("occupation:N", scale=alt.Scale(scheme='category20')), alt.value("lightgray")), ).properties(width=300, height=300, selection=occupation_filter) # Create the chart users_ratings = (ratings.groupby('user_id', as_index=False).agg({ 'rating': ['count', 'mean'] }).flatten_cols().merge(users, on='user_id')) # Create a chart for the count, and one for the mean. altair_viewer.show( alt.hconcat(filtered_hist('rating count', '# ratings / user', occupation_filter), filtered_hist('rating mean', 'mean user rating', occupation_filter), occupation_chart, data=users_ratings)) # Exploring the MovieLens Data (Movies) movies_ratings = get_movie_ratings(movies, ratings) genre_filter, genre_chart = filter_and_chart() (movies_ratings[['title', 'rating count', 'rating mean']].sort_values('rating count', ascending=False).head(10)) (movies_ratings[['title', 'rating count', 'rating mean']].mask( 'rating count', lambda x: x > 20).sort_values('rating mean', ascending=False).head(10)) # Display the number of ratings and average rating per movie altair_viewer.show( alt.hconcat(filtered_hist('rating count', '# ratings / movie', genre_filter), filtered_hist('rating mean', 'mean movie rating', genre_filter), genre_chart, data=movies_ratings))
def plot_results_means(): csvNames = [[ 'NB_EHmeansIncrementalMixedResults', 'NB_EHmeansIncrementalSeparatedResults', 'NB_meansIncrementalMixedResults', 'NB_meansIncrementalSeparatedResults' ], [ 'NB_EHmeansReocurringMixedResults', 'NB_EHmeansReocurringSeparatedResults', 'NB_meansReocurringMixedResults', 'NB_meansReocurringSeparatedResults' ], [ 'NB_EHmeansSuddenDriftMixedResults', 'NB_EHmeansSuddenDriftSeparatedResults', 'NB_meansSuddenDriftMixedResults', 'NB_meansSuddenDriftSeparatedResults' ]] dfs = [ pd.read_csv('./logs/sine/' + name + '.csv', skiprows=5).drop( ['current_acc_[M0]', 'current_kappa_[M0]'], axis=1) for names in csvNames for name in names ] colsNames = ['id', 'mean_acc', 'mean_kappa'] for df in dfs: df.columns = colsNames dfs = [df.melt('id', var_name='metrics') for df in dfs] incrementals = dfs[:4] reocurrings = dfs[4:8] sudden = dfs[8:] dfs = [incrementals, reocurrings, sudden] for j, df in enumerate(dfs): for i in range(len(df) - 2): alt.hconcat( alt.Chart(df[i], title=csvNames[j][i]).mark_line(opacity=0.7).encode( x='id', y='value', color='metrics', ), alt.Chart(df[i + 2], title=csvNames[j][i + 2]).mark_line( opacity=0.7).encode(x='id', y='value', color='metrics')).show()
def plot_line_by_key_altair(ds, key, title_fn=lambda x: "", cmap="viridis", c_sort="descending", c_title=""): """Make line plots of Q1 and Q2 for different levels of a dataset Args: ds: dataset wit """ if not c_title: c_title = key df = _to_plottable_dataframe(ds) z = get_pressure_encoding() color = alt.Color(key, scale=alt.Scale(scheme=cmap), sort=c_sort, legend=alt.Legend(title=c_title)) labels = [ ("a", "QV", "g/kg", "Water Vapor"), ("b", "Q1", "K/day", "Average Q₁"), ("c", "Q2", "g/kg/day", "Average Q₂"), ("d", "TABS", "K", "Temperature"), ("e", "Q1NN", "K/day", "Q₁ Prediction"), ("f", "Q2NN", "g/kg/day", "Q₂ Prediction"), ] charts = [] for letter, key, unit, label in labels: chart = (alt.Chart(df, width=150).mark_line().encode( alt.X(key, axis=alt.Axis(title=unit)), z, color, order="z").properties(title=f"{letter}) {label}")) charts.append(chart) row1 = alt.hconcat(*charts[:3]) row2 = alt.hconcat(*charts[3:]) cols = [ alt.vconcat(charts[0], charts[3]), alt.vconcat(charts[1], charts[4]).resolve_scale(x='shared'), alt.vconcat(charts[2], charts[5]).resolve_scale(x='shared'), ] return alt.hconcat(*cols, title=title_fn(ds))
def umap(z: np.ndarray, d: np.ndarray, lbls: np.ndarray, n_neighbors: int = 8): u = UMAP(n_neighbors=n_neighbors, metric="cosine").fit_transform(z) log_d = np.log1p(d.sum(1)) bot_d, top_d = np.percentile(log_d, (2.5, 97.5)) c = alt.Chart( pd.DataFrame({ "x": u[:, 0], "y": u[:, 1], "c": lbls, "log_d": log_d })).properties(height=300, width=300) return alt.hconcat( c.mark_point(opacity=0.3).encode(x="x:Q", y="y:Q", color=alt.Color("c:N", legend=None)), c.mark_point(opacity=0.8).encode( x="x:Q", y="y:Q", color=alt.Color( "log_d:Q", scale=alt.Scale(scheme="viridis", clamp=True, nice=True, domain=(bot_d, top_d)), ), ), )
def barplot_all(self, var_l, y_scale=None, y_max_check=False, default_col="test"): if not var_l: # if var_l is not provided, I will plot all var_l = [ col for col in self.results_flat.columns if default_col in col ] res_plot = self.results_flat[var_l + ["env"]].fillna("NaN") res_transf = res_plot.reset_index().melt(["env", "index"]) if y_scale: # should be a tuple, TODO if y_max_check: y_max = round(self.results_flat[var_l].max().max() + 0.1, 2) y_scale_update = (y_scale[0], y_max) y_bar = alt.Y("value:Q", scale=alt.Scale(domain=y_scale_update)) else: y_bar = alt.Y("value:Q", scale=alt.Scale(domain=y_scale)) else: y_bar = "value:Q" base = (alt.Chart(res_transf).mark_bar().encode( y=y_bar, x="variable:N", color="env:N").properties(width=400)) chart = alt.hconcat().properties(background="white") for env in [ee for ee in self.results_flat.env if ee != "N/A"]: chart |= base.transform_filter(alt.expr.datum.env == env) plot_dict = chart.to_dict() return plot_dict
def show_covid_feature_relationship(group_dict, sub_feature_list): st.write( "You are showing the relationship between **{}**, for each graph, the whole data points are separated into two bars according to the feature along the y-axis, and each bar is separated into two colors by the feature along the x-axis" .format(', '.join(sub_feature_list))) chart = alt.vconcat() for feature in sub_feature_list: target_df = group_dict[feature] row = alt.hconcat() y_title_type = None for other_feature in sub_feature_list: x_title_type = None if other_feature == sub_feature_list[0]: x_title_type = feature if feature == sub_feature_list[-1]: y_title_type = "Color: " + other_feature new_col = alt.Chart(target_df).mark_bar().encode( alt.X(feature + ' type:N', title=y_title_type), alt.Y(other_feature + ':Q', title=x_title_type), alt.Color(other_feature + ' type:N'), alt.Tooltip([other_feature + ':Q', other_feature + ' type:N' ])).properties(width=150, height=150) row |= new_col chart &= row st.altair_chart(chart)
def simple_map(data, clabel=None, projection='albersUsa', clim=None, cmap='reds'): lat = data['lat'].values.flatten() lon = data['lon'].values.flatten() color = data.values.flatten() inds = color > clim[0] shape = data['lat'].shape size = (300 / shape[0]) * (500 / shape[1]) * 0.9 row = alt.hconcat() row |= carto( lat=lat[inds], lon=lon[inds], color=color[inds], clim=clim, cmap=cmap, clabel=clabel, size=size, width=500, height=300, projection=projection, ) return row
def createChart(data): color_expression = "highlight._vgsid_==datum._vgsid_" #color_condition = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue") highlight_selection = alt.selection_single(name="highlight", empty="all", on="mouseover") rating_selection = alt.selection_single(name="rating", empty="all", encodings=['y']) maxCount = int(data['restaurants'].max()) barMean = alt.Chart() \ .mark_bar(stroke="Black") \ .encode( alt.X("mean(restaurants):Q", axis=alt.Axis(title="Restaurants")), alt.Y('cuisine:O', axis=alt.Axis(title="Cuisine"), sort=alt.SortField(field="restaurants", op="mean", order='descending')), alt.ColorValue("LightGrey"),#, condition=color_condition), # Remove color condition ).properties( width=200, height=350, selection = highlight_selection+rating_selection, ) return alt.hconcat(barMean, data=data)
def _draw_heatmap_by_substance_name(df, substance_name: str) -> t.List[alt.Chart]: # From dataframe filtering data for given substance df_by_substance_name = df.query( f'substance_name == "{substance_name}"' ) if substance_name != 'ALL_SUBSTANCES' else df # Sharing the y axis with other columns in visualization heat_maps = alt.hconcat().resolve_scale(y='shared') # Grouping data by age group and month-year of death groups = df_by_substance_name.groupby(['age_group', 'year_month_of_death']) # Calculating mean of substance amount all_data = groups['substance_amount'].mean().reset_index( name=color_coding_label) heat_maps |= _get_heatmap(all_data, substance_name) groups = df_by_substance_name.query('sex == "M"').groupby( ['age_group', 'year_month_of_death']) male_data = groups['substance_amount'].mean().reset_index( name=color_coding_label) heat_maps |= _get_heatmap(male_data, substance_name, "Male") groups = df_by_substance_name.query('sex == "F"').groupby( ['age_group', 'year_month_of_death']) female_data = groups['substance_amount'].mean().reset_index( name=color_coding_label) heat_maps |= _get_heatmap(female_data, substance_name, "Female") return heat_maps
def make_chart_organisational_diversity( org_coeffs, num_orgs, metric_params, org_type_lookup, paper_counts, save=True, fig_num=14, ): """Plot comparing the organisational diversity coefficients""" # Regression coefficients sorted selected = (org_coeffs[metric_params].sort_values("beta").head( n=num_orgs).reset_index(drop=False)) selected["org_type"] = selected["index"].map(org_type_lookup) selected["order"] = range(0, len(selected)) # Paper counts by organisation recent_papers_orgs = (paper_counts.loc[selected["index"]].reset_index( name="papers").rename(columns={"index": "org"})) recent_papers_orgs["order"] = range(0, len(recent_papers_orgs)) recent_papers_orgs["org_type"] = recent_papers_orgs["org"].map( org_type_lookup) b_ch = (alt.Chart(selected).mark_bar().encode( y=alt.Y("index", sort=alt.EncodingSortField("order"), title=""), x=alt.X("beta", title="Coefficient on diversity"), color=alt.X("org_type", title="Organisation type"), )).properties(width=150, height=600) b_err = (alt.Chart(selected).mark_errorbar().encode( y=alt.Y( "index", sort=alt.EncodingSortField("order"), title="", axis=alt.Axis(ticks=False, labels=False), ), x=alt.X("lower", title=""), x2="upper", )).properties(width=150, height=600) b_act = (alt.Chart(recent_papers_orgs).mark_bar().encode( y=alt.Y( "org", title=None, sort=alt.EncodingSortField("order"), axis=alt.Axis(labels=False, ticks=False), ), x=alt.X("papers"), color="org_type", )).properties(width=100, height=600) out = (b_ch + b_err).resolve_scale(y="independent") out_2 = alt.hconcat(out, b_act, spacing=0).resolve_scale(y="shared") if save is True: save_altair(out_2, f"fig_{fig_num}_comp", driv) return out_2
def _plots(self): tic = time.time() # if self._sensor_groups is None: # self._sensor_groups = [self.in_default] with st.spinner('Generating Plots'): plot = alt.hconcat( self.plotNonTime( 'T_diff', 'T_diff_eff').properties(width=self.def_width), self.plotNonTime('solar_w', 'geo_tot_w').properties(width=self.def_width)) plot = plot.configure_axis( labelFontSize=self.label_font_size, titleFontSize=self.title_font_size, titlePadding=41, domain=False).configure_legend( labelFontSize=self.label_font_size, titleFontSize=self.title_font_size).configure_view( cornerRadius=2) message([F"{'Altair plot gen:': <20}", F"{time.time() - tic:.2f} s"], tbl=self.mssg_tbl, mssgType='TIMING') return [plot, plot]
def createChart(data, name=''): color_expression = "highlight._vgsid_==datum._vgsid_" color_condition = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue") highlight_selection = alt.selection_single(name="highlight", empty="all", on="mouseover") barMean = alt.Chart() \ .mark_bar(stroke="Black") \ .encode( alt.X("rating:Q", axis=alt.Axis(title="The number of restaurants")), alt.Y('name:O', axis=alt.Axis(title="Cuisines".format(name)), sort=alt.SortField(field="rating", op="mean", order='descending')), alt.ColorValue("LightGrey", condition=color_condition), ).properties( selection = highlight_selection, ) return alt.hconcat( barMean, data=data, title="The number of restaurants ({} in NYC) - Top 25 cuisines".format( name))
def tv_linkedScatterPlot(data, engine, xlabel, ylabel1, ylabel2): data = data.copy() # data['year'] = data.apply(lambda x : x.name.year, axis=1) data.rename(columns={ 'plotY': xlabel, 'plotX1': ylabel1, 'plotX2': ylabel2 }, inplace=True) interval = alt.selection(type='interval', encodings=['x', 'y']) base = alt.Chart(data) base = base.mark_point() lplot = base.encode(x=ylabel1, y=alt.Y('{0}:Q'.format(xlabel), axis=alt.Axis(format='~s')), color=alt.condition(interval, 'anfreq_label', alt.value('lightgray'))) lplot = lplot.properties(selection=interval, width=260, height=300) rplot = base.encode(x=ylabel2, y=alt.Y('{0}:Q'.format(xlabel), title='', axis=alt.Axis(labels=False)), color=alt.condition(interval, 'anfreq_label', alt.value('lightgray'))) rplot = rplot.properties(selection=interval, width=260, height=300) p = alt.hconcat(lplot, rplot, spacing=0) return p
def get_chart(keyword): db_string = "postgres://*****:*****@postgres:5432/shared" if keyword == "*": query = "select article_id,string_date,site,palabra,n_w from tb_news_covid_mexico_palabras_top_tfidf" else: query = "select article_id,string_date,site,palabra,n_w from tb_news_covid_mexico_palabras_top_tfidf where article_id in (select article_id from tb_news_covid_mexico_date_text where clean_text LIKE '%" + keyword + "%' )" db = create_engine(db_string) df = pd.read_sql_query(sqlalchemy.text(query), db) chart3 = alt.Chart(df).mark_point().encode( y='count()', x='string_date:T').properties(width=900).interactive() chart1 = alt.Chart(df).mark_bar().encode( x=alt.X('count(article_id):Q'), y=alt.Y("site:N", sort=alt.EncodingSortField( field="site", op="count", order="descending"))).transform_aggregate( groupby=["article_id", "site"]).properties(height=800) chart2 = alt.Chart(df).mark_bar().encode( x=alt.X('freq_palabras:Q', aggregate="sum"), y=alt.Y( "palabra", sort=alt.EncodingSortField( field="freq_palabras", op="sum", order="descending"))).transform_aggregate( freq_palabras='sum(n_w)', groupby=["palabra"], ).transform_window( rank='row_number()', sort=[alt.SortField("freq_palabras", order="descending")], ).transform_filter( (alt.datum.rank < 25)).properties(height=800) return alt.vconcat(chart3, alt.hconcat(chart1, chart2)).to_json()
def plot_class_report(learn): alt.renderers.enable('notebook') clsrpt = class_report(learn) charts = [] for key in clsrpt.keys(): fildata = clsrpt[key] fildatax = [] fildatay = [] supp = key + ', Support: ' for xxx in fildata: if xxx == 'support': supp += str(fildata[xxx]) continue fildatax.append(xxx) fildatay.append(fildata[xxx]) df = pd.DataFrame({'x': fildatax, 'y': fildatay}) bars = alt.Chart(df, width=200).mark_bar(size=30).encode( x=alt.X("x", axis=alt.Axis(labelAngle=0, title='')), y=alt.Y('y', axis=alt.Axis(title=''), scale=alt.Scale(domain=(0, 1))), ) text = alt.Chart(df).mark_text(baseline='bottom', dy=-1).encode( x='x', y='y', text=alt.Text('y', format='.2f')) chart = bars + text charts.append(chart.properties(title=supp)) return reduce((lambda x, y: alt.hconcat(x, y)), charts)
def save_plot_movies(name): movies_ratings = movies.merge(ratings.groupby('movie_id', as_index=False).agg({ 'rating': ['count', 'mean'] }).flatten_cols(), on='movie_id') genre_filter = alt.selection_multi(fields=['genre']) genre_chart = alt.Chart().mark_bar().encode( x="count()", y=alt.Y('genre'), color=alt.condition(genre_filter, alt.Color("genre:N"), alt.value('lightgray'))).properties( height=300, selection=genre_filter) (movies_ratings[['title', 'rating count', 'rating mean']].sort_values('rating count', ascending=False).head(10)) (movies_ratings[['title', 'rating count', 'rating mean']].mask( 'rating count', lambda x: x > 20).sort_values('rating mean', ascending=False).head(10)) plot = alt.hconcat(filtered_hist('rating count', '# ratings / movie', genre_filter), filtered_hist('rating mean', 'mean movie rating', genre_filter), genre_chart, data=movies_ratings) plot.save(name)
def movie_embedding_norm(models): """Visualizes the norm and number of ratings of the movie embeddings. Args: model: A MFModel object. """ if not isinstance(models, list): models = [models] df = pd.DataFrame({ 'title': movies['title'], 'genre': movies['genre'], 'num_ratings': movies_ratings['rating count'], }) charts = [] brush = alt.selection_interval() for i, model in enumerate(models): norm_key = 'norm' + str(i) df[norm_key] = np.linalg.norm(model.embeddings["movie_id"], axis=1) nearest = alt.selection( type='single', encodings=['x', 'y'], on='mouseover', nearest=True, empty='none') base = alt.Chart().mark_circle().encode( x='num_ratings', y=norm_key, color=alt.condition(brush, alt.value('#4c78a8'), alt.value('lightgray')) ).properties( selection=nearest).add_selection(brush) text = alt.Chart().mark_text(align='center', dx=5, dy=-5).encode( x='num_ratings', y=norm_key, text=alt.condition(nearest, 'title', alt.value(''))) charts.append(alt.layer(base, text)) return altair_viewer.show(alt.hconcat(*charts, data=df))
def make_figure(x_axis, y_axis): brush = alt.selection_interval() base = alt.Chart(cars) # scatter plot of x vs y scatter = ( base.mark_point() .encode(x=x_axis, y=y_axis, color="Origin:N") .properties(width=250, height=400, selection=brush) ) # histogram of horsepower hist = ( base.mark_bar() .encode(x=alt.X("Horsepower:Q", bin=True), y="count()", color="Origin:N") .transform_filter(brush.ref()) ).properties(height=375) chart = alt.hconcat(scatter, hist) # Save html as a StringIO object in memory cars_html = io.StringIO() chart.save(cars_html, "html") # Return the html from StringIO object return cars_html.getvalue()
def world_map_for_factors(highlight, dataset, select_year): cols = alt.hconcat() for val in dataset: map = alt.Chart(df).mark_geoshape( stroke='#aaa', strokeWidth=0.25 ).encode( x = alt.X("Country Name"), color=alt.condition(highlight, val, alt.value('lightgrey'), scale=alt.Scale(scheme='yelloworangered'), title=""), tooltip=["Country Name"] + dataset ).transform_lookup( lookup='Country Name', from_=alt.LookupData( "https://raw.githubusercontent.com/KoGor/Map-Icons-Generator/master/data/world-110m-country-names.tsv", 'name', ['id', "name"]) ).transform_lookup( lookup='id', from_=alt.LookupData(countries, 'id', fields=["id", "type", "properties", "geometry"]) ).project( type="equirectangular" ).properties( width=500, height=200, title=val, ).add_selection(select_year, highlight) \ .transform_filter(select_year) cols &= map return cols.resolve_scale(color='independent')
def make_etc_coverage_heatmap(etc_coverage, mag_order=None, module_order=None): num_mags_in_frame = len(set(etc_coverage['genome'])) charts = list() for i, (etc_complex, frame) in enumerate(etc_coverage.groupby('complex')): # if this is the first chart then make y-ticks otherwise none c = alt.Chart(frame, title=etc_complex).encode( x=alt.X('module_name', title=None, axis=alt.Axis(labelLimit=0, labelAngle=90), sort=module_order), y=alt.Y('genome', axis=alt.Axis(title=None, labels=False, ticks=False), sort=mag_order), tooltip=[ alt.Tooltip('genome', title='Genome'), alt.Tooltip('module_name', title='Module Name'), alt.Tooltip('path_length', title='Module Subunits'), alt.Tooltip('path_length_coverage', title='Subunits present'), alt.Tooltip('genes', title='Genes present'), alt.Tooltip('missing_genes', title='Genes missing') ]).mark_rect().encode( color=alt.Color('percent_coverage', legend=alt.Legend(title='% Complete'), scale=alt.Scale(domain=(0, 1)))).properties( width=HEATMAP_CELL_WIDTH * len(set(frame['module_name'])), height=HEATMAP_CELL_HEIGHT * num_mags_in_frame) charts.append(c) concat_title = alt.TitleParams('ETC Complexes', anchor='middle') return alt.hconcat(*charts, spacing=5, title=concat_title)
def create_ridgeline_plot(data): """A function that creates a ridgeline plot for covid_19 CAN & USA dataset. Parameters ---------- data input data set from preprocessed csv. Returns ------- altair object returns the plot as a altair object """ usa = data.query( "iso_code == @ISO_CODES['USA'] and new_tests > 0 and date > @START_DATE and date <= @END_DATE" ).reset_index(drop=True) can = data.query( "iso_code == @ISO_CODES['CANADA'] and new_tests > 0 and date >= @START_DATE and date <= @END_DATE" ).reset_index(drop=True) can_usa_plt = alt.hconcat( generate_ridgeline_plot(can, "Canada"), generate_ridgeline_plot(usa, "USA"), title="COVID-19 Response Ratio - Canada vs USA").configure_facet( spacing=0).configure_view(stroke=None).configure_title( anchor='middle') return can_usa_plt
def create_distribution_figure(df, by_class, size=175): """ Create an altair chart for each column in a dataframe. Optionally, plot distribution by a target class. Parameters ---------- df : pandas DataFrame by_class : boolean Whether to plot data by class size : integer Size (width & height) of the returned plot Returns ------- Altair chart """ plot_rows = alt.vconcat(data=df) n_cols = 3 n_rows = (len(df.columns) - 1) // n_cols + 1 for i in range(0, len(df.columns), n_cols): current_row = df.columns[i:i + n_cols] plot_cols = alt.hconcat() target_labels = df.target.unique() for df_col in current_row: if df_col == "target": trg = df.target.value_counts().reset_index().rename( columns={ "target": "count", "index": "label" }) cht = alt.Chart(trg).mark_bar( stroke="black", strokeWidth=3).encode( x=alt.X("label:O", title="target"), y=alt.Y("count:Q", title="Count"), color=alt.Color( "label:O", scale=alt.Scale( domain=target_labels, range=get_target_colors(target_labels)), legend=None), tooltip=["label", "count"]).properties(width=size, height=size, title="target") plot_cols |= cht else: plot_cols |= create_grouped_kde(df, df_col, "target", by_class, size=size) plot_rows &= plot_cols chart = plot_rows.configure_legend( orient="top", titleFontSize=10, labelFontSize=10).configure_title( fontSize=14, anchor="middle").configure_axis(grid=False, labelAngle=0) return chart
def single_figure(text, o_html, full_pds): subtext = ['Parameters:'] tooltip = ['sample_name', 'PHATE1', 'PHATE2'] circ = alt.Chart(full_pds).mark_point(size=20).encode(x='PHATE1:Q', y='PHATE2:Q') has_cats = 0 has_nums = 0 if 'variable' in full_pds.columns: dtypes_set = set(full_pds['dtype']) if 'categorical' in dtypes_set: cats = full_pds.loc[full_pds.dtype == 'categorical'] cats_init = sorted( [x for x in cats['variable'] if str(x) != 'nan'], key=lambda x: -len(x))[0] cats_dropdown = alt.binding_select( options=cats['variable'].unique(), name='variable:') cats_select = alt.selection_single(fields=['variable'], bind=cats_dropdown, name="categorical variable", init={'variable': cats_init}) cats_plot = make_subplot(circ, cats_select, list(tooltip), 'N') has_cats = 1 if 'numerical' in dtypes_set: nums = full_pds.loc[full_pds.dtype == 'numerical'] cats_init = sorted( [x for x in nums['variable'] if str(x) != 'nan'], key=lambda x: -len(x))[0] nums_dropdown = alt.binding_select( options=nums['variable'].unique(), name='variable:') nums_select = alt.selection_single(fields=['variable'], bind=nums_dropdown, name="numerical variable", init={'variable': cats_init}) nums_plot = make_subplot(circ, nums_select, list(tooltip), 'Q') has_nums = 1 title = { "text": text, "color": "black", } if subtext != ['Parameters:']: title.update({ "subtitle": (subtext + ["(based on altair)"]), "subtitleColor": "grey" }) if has_nums and has_cats: circ = alt.hconcat(cats_plot, nums_plot) elif has_nums: circ = nums_plot elif has_cats: circ = cats_plot circ.save(o_html) print('-> Written:', o_html)
def get_interactive_proportions_plot(gender_balance): source = data_frames[gender_balance] pts = alt.selection(type="multi", encodings=['x']) lin = alt.Chart(source).mark_line().encode( alt.X('year:O', title='Year'), alt.Y('female_prop:Q', title="Proportion of Women", axis=alt.Axis(format='%'), scale=alt.Scale(domain=[0, 1])), alt.Color('job:N', legend=None)).transform_filter(pts).properties( width=500, height=375, title="Proportion of Women by Year") label = alt.selection_single( encodings=['x'], # limit selection to x-axis value on='mouseover', # select on mouseover events nearest=True, # select data point nearest the cursor empty='none' # empty selection includes no data points ) lin_w_interaction = alt.layer( lin, # base line chart alt.Chart().mark_rule(color='#aaa').encode( x='year:O').transform_filter(label), lin.mark_circle().encode(opacity=alt.condition(label, alt.value( 1), alt.value(0))).add_selection(label), lin.mark_text( align='left', dx=5, dy=-5, stroke='white', strokeWidth=2).encode(text=alt.Text( 'female_prop:Q', format='.2%')).transform_filter(label), lin.mark_text(align='left', dx=5, dy=-5).encode(text=alt.Text( 'female_prop:Q', format='.2%')).transform_filter(label), data=source) bar = alt.Chart(source).mark_bar(size=30).encode( y=alt.Y('job:N', title='', sort=alt.EncodingSortField(field="total_prop_female", op="sum", order="descending")), x=alt.X('total_prop_female:Q', title="Proportion of Women", axis=alt.Axis(format='%')), color=alt.condition(pts, alt.Color( 'job:N', legend=None), alt.ColorValue("grey"))).properties( width=250, height=375, title="Jobs by Proportion of Women (For the 10 most " + gender_balance + " jobs)").add_selection(pts) interactive_job_chart = alt.hconcat(lin_w_interaction, bar).resolve_legend( color="independent", size="independent").configure_axis(labelFontSize=13, titleFontSize=14) # Save html as a StringIO object in memory job_gender_proportions_html = io.StringIO() interactive_job_chart.save(job_gender_proportions_html, 'html') # Return the html from StringIO object return job_gender_proportions_html.getvalue()
x='x', y='y' ).transform_filter( pts.ref() ).properties( width=300, height=300 ) # right panel: histogram mag = alt.Chart().mark_bar().encode( x='mbin:N', y="count()", color=alt.condition(pts, alt.value("black"), alt.value("lightgray")) ).properties( selection=pts, width=300, height=300 ) # build the chart: alt.hconcat( points, mag, data=source ).transform_bin( "mbin", field="m", bin=alt.Bin(maxbins=20) )
""" World Projections ----------------- This example shows a map of the countries of the world using four available geographic projections. For more details on the projections available in Altair, see https://vega.github.io/vega-lite/docs/projection.html """ # category: maps import altair as alt from vega_datasets import data source = alt.topo_feature(data.world_110m.url, 'countries') base = alt.Chart(source).mark_geoshape( fill='#666666', stroke='white' ).properties( width=300, height=180 ) projections = ['equirectangular', 'mercator', 'orthographic', 'gnomonic'] charts = [base.project(proj).properties(title=proj) for proj in projections] alt.vconcat( alt.hconcat(*charts[:2]), alt.hconcat(*charts[2:]) )