Esempio n. 1
0
def _build_all_charts(vis_data, sample_name=''):
    """Build all charts and combine into a single interface."""

    # Row 1
    type_chart = _build_type_chart(vis_data['variant_type_counts'])
    depth_chart = _build_depth_histogram(vis_data['depth_histogram'])
    qual_histogram = _build_qual_histogram(vis_data['qual_histogram'])
    gq_histogram = _build_gq_histogram(vis_data['gq_histogram'])
    row1 = alt.hconcat(type_chart, depth_chart, qual_histogram, gq_histogram) \
        .resolve_scale(color='independent')

    # Row 2
    vaf_histograms, other_vaf_histograms = _build_vaf_histograms(
        vis_data['vaf_histograms_by_genotype'])
    row2 = alt.hconcat(vaf_histograms, other_vaf_histograms)

    # Row 3
    base_change_chart = _build_base_change_chart(vis_data['base_changes'])
    indel_size_chart = _build_indel_size_chart(vis_data['indel_sizes'])
    tt_chart = _build_tt_chart(vis_data['titv_counts'])
    row3 = alt.hconcat(base_change_chart, tt_chart, indel_size_chart) \
        .resolve_scale(color='independent')

    # Putting it all together
    all_charts = alt.vconcat(row1, row2, row3)

    all_charts = all_charts.properties(title=sample_name, spacing=70) \
        .configure_header(labelFontSize=16, titleFontSize=20) \
        .configure_title(fontSize=20)
    return all_charts
def plot_line_by_key_altair(ds, key, title_fn=lambda x: '', cmap='viridis', c_sort="descending", c_title=''):
    """Make line plots of Q1 and Q2 for different levels of a dataset"""
    
    if not c_title:
        c_title = key
    
    df = _to_plottable_dataframe(ds)
    
    z = alt.Y('z', axis=alt.Axis(title='z (m)'))
    color = alt.Color(key, scale=alt.Scale(scheme=cmap), sort=c_sort, legend=alt.Legend(title=c_title))
    
    
    labels = [
        ('a', 'QT', 'g/kg','Total Water'),
        ('b', 'SLI', 'K', 'Liquid-Ice Static Energy'),
        ('c', 'Q1', 'K/day', 'Average Q₁'),
        ('d', 'Q2', 'g/kg/day', 'Average Q₂'),
        ('e', 'Q1NN', 'K/day', 'Q₁ Prediction'),
        ('f', 'Q2NN', 'g/kg/day', 'Q₂ Prediction')
    ]

    charts = []
    for letter, key, unit, label in labels:
        chart = (alt.Chart(df, width=150).mark_line()
         .encode(alt.X(key, axis=alt.Axis(title=unit)),
                 z,
                 color, order='z')
         .properties(title=f'{letter}) {label}')
        )
        charts.append(chart)

    row1 = alt.hconcat(*charts[:3])
    row2 = alt.hconcat(*charts[3:])
    return alt.vconcat(row1, row2, title=title_fn(ds))
Esempio n. 3
0
def summarize_vgfs(input_file,
                   output_dir,
                   groupby_column='scaffold',
                   max_auxiliary_score=3,
                   remove_transposons=False,
                   remove_fs=False):
    start_time = datetime.now()

    # set up
    annotations = pd.read_csv(input_file, sep='\t', index_col=0)
    db_locs = get_database_locs()
    if 'genome_summary_form' not in db_locs:
        raise ValueError(
            'Genome summary form location must be set in order to summarize genomes'
        )
    mkdir(output_dir)
    genome_summary_form = pd.read_csv(db_locs['genome_summary_form'],
                                      sep='\t',
                                      index_col=0)
    print('%s: Retrieved database locations and descriptions' %
          (str(datetime.now() - start_time)))

    # get potential AMGs
    # potential_amgs = filter_to_amgs(annotations.fillna(''), max_aux=max_auxiliary_score,
    #                                 remove_transposons=remove_transposons, remove_fs=remove_fs, remove_js=remove_js)
    potential_amgs = filter_to_amgs(annotations.fillna(''),
                                    max_aux=max_auxiliary_score,
                                    remove_transposons=remove_transposons,
                                    remove_fs=remove_fs)
    print('%s: Determined potential amgs' % (str(datetime.now() - start_time)))

    # make distillate
    viral_genome_stats = make_viral_stats_table(annotations, potential_amgs,
                                                groupby_column)
    viral_genome_stats.to_csv(path.join(output_dir, 'vMAG_stats.tsv'),
                              sep='\t')
    print('%s: Calculated viral genome statistics' %
          (str(datetime.now() - start_time)))

    viral_distillate = make_viral_distillate(potential_amgs,
                                             genome_summary_form)
    viral_distillate.to_csv(path.join(output_dir, 'amg_summary.tsv'),
                            sep='\t',
                            index=None)
    print('%s: Generated AMG summary' % (str(datetime.now() - start_time)))

    # make liquor
    vgf_order = make_vgf_order(potential_amgs)
    amg_column = make_amg_count_column(potential_amgs, vgf_order)
    viral_function_df = make_viral_functional_df(potential_amgs,
                                                 genome_summary_form,
                                                 groupby_column=groupby_column)
    viral_functional_heatmap = make_viral_functional_heatmap(
        viral_function_df, vgf_order)
    alt.hconcat(amg_column, viral_functional_heatmap,
                spacing=5).save(path.join(output_dir, 'product.html'))
    print('%s: Generated product heatmap' % (str(datetime.now() - start_time)))
    print("%s: Completed distillation" % str(datetime.now() - start_time))
Esempio n. 4
0
def outcome_summary(df,
                    c_lim=0,
                    d_lim=0,
                    ao_lim=110,
                    ao_base=True,
                    tcase=False,
                    color='black',
                    hspacing=20,
                    vspacing=20,
                    **kwargs):
    c_var = 'ki' if tcase else 'c'

    c = 1e6 * df[c_var].diff()
    d = 1e6 * df['d'].diff()
    a = 1e2 * df['act']
    o = 1e2 * df['out']

    c_max = np.maximum(c_lim, c.quantile(0.9, axis=1).max())
    d_max = np.maximum(d_lim, d.quantile(0.9, axis=1).max())

    ch_c = path_dist(c, title='Daily cases per million', y_max=c_max, **kwargs)
    ch_d = path_dist(d,
                     title='Daily deaths per million',
                     y_max=d_max,
                     **kwargs)
    ch_a = path_dist(a, title='Economic activity (%)', y_max=ao_lim, **kwargs)
    ch_o = path_dist(o, title='Economic output (%)', y_max=ao_lim, **kwargs)

    if ao_base:
        base = 99.4 * np.ones(len(df))

        a_base = pd.DataFrame({'base': base}, index=a.index).reset_index()
        ch_ab = alt.Chart(a_base).mark_line(strokeDash=[5, 2],
                                            strokeWidth=1,
                                            color=color).encode(x='date',
                                                                y='base')
        ch_a += ch_ab

        o_base = pd.DataFrame({'base': base}, index=o.index).reset_index()
        ch_ob = alt.Chart(o_base).mark_line(strokeDash=[5, 2],
                                            strokeWidth=1,
                                            color=color).encode(x='date',
                                                                y='base')
        ch_o += ch_ob

    ch = alt.vconcat(
        alt.hconcat(ch_c, ch_d, spacing=hspacing),
        alt.hconcat(ch_a, ch_o, spacing=hspacing),
        spacing=vspacing,
    )

    ch = ch.configure_axisY(minExtent=40, labelFlush=True)
    ch = ch.configure_axis(domainColor=color,
                           tickColor=color,
                           labelColor=color)
    ch = ch.configure_title(color=color)

    return ch
Esempio n. 5
0
def make_liquor_heatmap(module_coverage_frame, etc_coverage_df, function_df, mag_order=None, labels=None):
    module_coverage_heatmap = make_module_coverage_heatmap(module_coverage_frame, mag_order)
    etc_heatmap = make_etc_coverage_heatmap(etc_coverage_df, mag_order=mag_order)
    if labels is not None:
        function_df, mag_order = rename_genomes_to_taxa(function_df, labels, mag_order)
    function_heatmap = make_functional_heatmap(function_df, mag_order)

    liquor = alt.hconcat(alt.hconcat(module_coverage_heatmap, etc_heatmap), function_heatmap)
    return liquor
Esempio n. 6
0
 def histograms(self):
     """
     Produces a histogram for every tag that has been optimised, laid out horizontally to each other.
     :return: the produced graph
     """
     allGraphs = self.histogram("Health")
     allGraphs = alt.hconcat(allGraphs, self.histogram("Magic"))
     for (tag, _, _) in ChromosomeController.nondominatedFront[0].tags:
         allGraphs = alt.hconcat(allGraphs, self.histogram(tag))
     allGraphs.properties(
         title="Histograms"
     )
     return allGraphs
Esempio n. 7
0
def explore_dataset(users, movies, ratings):
    # Exploring the MovieLens Data (Users)
    print(users.describe())  # User features
    print(users.describe(include=[np.object]))  # Categorical user features

    # Create filters to slice the data
    occupation_filter = alt.selection_multi(fields=["occupation"])
    occupation_chart = alt.Chart().mark_bar().encode(
        x="count()",
        y=alt.Y("occupation:N"),
        color=alt.condition(
            occupation_filter,
            alt.Color("occupation:N", scale=alt.Scale(scheme='category20')),
            alt.value("lightgray")),
    ).properties(width=300, height=300, selection=occupation_filter)

    # Create the chart
    users_ratings = (ratings.groupby('user_id', as_index=False).agg({
        'rating': ['count', 'mean']
    }).flatten_cols().merge(users, on='user_id'))

    # Create a chart for the count, and one for the mean.
    altair_viewer.show(
        alt.hconcat(filtered_hist('rating count', '# ratings / user',
                                  occupation_filter),
                    filtered_hist('rating mean', 'mean user rating',
                                  occupation_filter),
                    occupation_chart,
                    data=users_ratings))

    # Exploring the MovieLens Data (Movies)
    movies_ratings = get_movie_ratings(movies, ratings)

    genre_filter, genre_chart = filter_and_chart()

    (movies_ratings[['title', 'rating count',
                     'rating mean']].sort_values('rating count',
                                                 ascending=False).head(10))

    (movies_ratings[['title', 'rating count', 'rating mean']].mask(
        'rating count',
        lambda x: x > 20).sort_values('rating mean', ascending=False).head(10))

    # Display the number of ratings and average rating per movie
    altair_viewer.show(
        alt.hconcat(filtered_hist('rating count', '# ratings / movie',
                                  genre_filter),
                    filtered_hist('rating mean', 'mean movie rating',
                                  genre_filter),
                    genre_chart,
                    data=movies_ratings))
def plot_results_means():
    csvNames = [[
        'NB_EHmeansIncrementalMixedResults',
        'NB_EHmeansIncrementalSeparatedResults',
        'NB_meansIncrementalMixedResults',
        'NB_meansIncrementalSeparatedResults'
    ],
                [
                    'NB_EHmeansReocurringMixedResults',
                    'NB_EHmeansReocurringSeparatedResults',
                    'NB_meansReocurringMixedResults',
                    'NB_meansReocurringSeparatedResults'
                ],
                [
                    'NB_EHmeansSuddenDriftMixedResults',
                    'NB_EHmeansSuddenDriftSeparatedResults',
                    'NB_meansSuddenDriftMixedResults',
                    'NB_meansSuddenDriftSeparatedResults'
                ]]

    dfs = [
        pd.read_csv('./logs/sine/' + name + '.csv', skiprows=5).drop(
            ['current_acc_[M0]', 'current_kappa_[M0]'], axis=1)
        for names in csvNames for name in names
    ]

    colsNames = ['id', 'mean_acc', 'mean_kappa']

    for df in dfs:
        df.columns = colsNames

    dfs = [df.melt('id', var_name='metrics') for df in dfs]

    incrementals = dfs[:4]
    reocurrings = dfs[4:8]
    sudden = dfs[8:]

    dfs = [incrementals, reocurrings, sudden]

    for j, df in enumerate(dfs):
        for i in range(len(df) - 2):
            alt.hconcat(
                alt.Chart(df[i],
                          title=csvNames[j][i]).mark_line(opacity=0.7).encode(
                              x='id',
                              y='value',
                              color='metrics',
                          ),
                alt.Chart(df[i + 2], title=csvNames[j][i + 2]).mark_line(
                    opacity=0.7).encode(x='id', y='value',
                                        color='metrics')).show()
Esempio n. 9
0
def plot_line_by_key_altair(ds,
                            key,
                            title_fn=lambda x: "",
                            cmap="viridis",
                            c_sort="descending",
                            c_title=""):
    """Make line plots of Q1 and Q2 for different levels of a dataset

    Args:
        ds: dataset wit
    """

    if not c_title:
        c_title = key

    df = _to_plottable_dataframe(ds)

    z = get_pressure_encoding()

    color = alt.Color(key,
                      scale=alt.Scale(scheme=cmap),
                      sort=c_sort,
                      legend=alt.Legend(title=c_title))

    labels = [
        ("a", "QV", "g/kg", "Water Vapor"),
        ("b", "Q1", "K/day", "Average Q₁"),
        ("c", "Q2", "g/kg/day", "Average Q₂"),
        ("d", "TABS", "K", "Temperature"),
        ("e", "Q1NN", "K/day", "Q₁ Prediction"),
        ("f", "Q2NN", "g/kg/day", "Q₂ Prediction"),
    ]

    charts = []
    for letter, key, unit, label in labels:
        chart = (alt.Chart(df, width=150).mark_line().encode(
            alt.X(key, axis=alt.Axis(title=unit)), z, color,
            order="z").properties(title=f"{letter}) {label}"))
        charts.append(chart)

    row1 = alt.hconcat(*charts[:3])
    row2 = alt.hconcat(*charts[3:])

    cols = [
        alt.vconcat(charts[0], charts[3]),
        alt.vconcat(charts[1], charts[4]).resolve_scale(x='shared'),
        alt.vconcat(charts[2], charts[5]).resolve_scale(x='shared'),
    ]

    return alt.hconcat(*cols, title=title_fn(ds))
Esempio n. 10
0
def umap(z: np.ndarray, d: np.ndarray, lbls: np.ndarray, n_neighbors: int = 8):
    u = UMAP(n_neighbors=n_neighbors, metric="cosine").fit_transform(z)

    log_d = np.log1p(d.sum(1))
    bot_d, top_d = np.percentile(log_d, (2.5, 97.5))

    c = alt.Chart(
        pd.DataFrame({
            "x": u[:, 0],
            "y": u[:, 1],
            "c": lbls,
            "log_d": log_d
        })).properties(height=300, width=300)

    return alt.hconcat(
        c.mark_point(opacity=0.3).encode(x="x:Q",
                                         y="y:Q",
                                         color=alt.Color("c:N", legend=None)),
        c.mark_point(opacity=0.8).encode(
            x="x:Q",
            y="y:Q",
            color=alt.Color(
                "log_d:Q",
                scale=alt.Scale(scheme="viridis",
                                clamp=True,
                                nice=True,
                                domain=(bot_d, top_d)),
            ),
        ),
    )
Esempio n. 11
0
 def barplot_all(self,
                 var_l,
                 y_scale=None,
                 y_max_check=False,
                 default_col="test"):
     if not var_l:
         # if var_l is not provided, I will plot all
         var_l = [
             col for col in self.results_flat.columns if default_col in col
         ]
     res_plot = self.results_flat[var_l + ["env"]].fillna("NaN")
     res_transf = res_plot.reset_index().melt(["env", "index"])
     if y_scale:  # should be a tuple, TODO
         if y_max_check:
             y_max = round(self.results_flat[var_l].max().max() + 0.1, 2)
             y_scale_update = (y_scale[0], y_max)
             y_bar = alt.Y("value:Q",
                           scale=alt.Scale(domain=y_scale_update))
         else:
             y_bar = alt.Y("value:Q", scale=alt.Scale(domain=y_scale))
     else:
         y_bar = "value:Q"
     base = (alt.Chart(res_transf).mark_bar().encode(
         y=y_bar, x="variable:N", color="env:N").properties(width=400))
     chart = alt.hconcat().properties(background="white")
     for env in [ee for ee in self.results_flat.env if ee != "N/A"]:
         chart |= base.transform_filter(alt.expr.datum.env == env)
     plot_dict = chart.to_dict()
     return plot_dict
Esempio n. 12
0
def show_covid_feature_relationship(group_dict, sub_feature_list):

    st.write(
        "You are showing the relationship between **{}**, for each graph, the whole data points are separated into two bars according to the feature along the y-axis, and each bar is separated into two colors by the feature along the x-axis"
        .format(', '.join(sub_feature_list)))

    chart = alt.vconcat()
    for feature in sub_feature_list:
        target_df = group_dict[feature]
        row = alt.hconcat()
        y_title_type = None
        for other_feature in sub_feature_list:
            x_title_type = None
            if other_feature == sub_feature_list[0]:
                x_title_type = feature
            if feature == sub_feature_list[-1]:
                y_title_type = "Color: " + other_feature
            new_col = alt.Chart(target_df).mark_bar().encode(
                alt.X(feature + ' type:N', title=y_title_type),
                alt.Y(other_feature + ':Q', title=x_title_type),
                alt.Color(other_feature + ' type:N'),
                alt.Tooltip([other_feature + ':Q', other_feature + ' type:N'
                             ])).properties(width=150, height=150)
            row |= new_col
        chart &= row
    st.altair_chart(chart)
Esempio n. 13
0
def simple_map(data,
               clabel=None,
               projection='albersUsa',
               clim=None,
               cmap='reds'):
    lat = data['lat'].values.flatten()
    lon = data['lon'].values.flatten()
    color = data.values.flatten()
    inds = color > clim[0]

    shape = data['lat'].shape
    size = (300 / shape[0]) * (500 / shape[1]) * 0.9

    row = alt.hconcat()

    row |= carto(
        lat=lat[inds],
        lon=lon[inds],
        color=color[inds],
        clim=clim,
        cmap=cmap,
        clabel=clabel,
        size=size,
        width=500,
        height=300,
        projection=projection,
    )

    return row
Esempio n. 14
0
def createChart(data):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    #color_condition     = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")
    rating_selection = alt.selection_single(name="rating",
                                            empty="all",
                                            encodings=['y'])
    maxCount = int(data['restaurants'].max())

    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("mean(restaurants):Q", axis=alt.Axis(title="Restaurants")),
            alt.Y('cuisine:O', axis=alt.Axis(title="Cuisine"),
                  sort=alt.SortField(field="restaurants", op="mean", order='descending')),
            alt.ColorValue("LightGrey"),#, condition=color_condition), # Remove color condition
        ).properties(
            width=200,
            height=350,
            selection = highlight_selection+rating_selection,
        )

    return alt.hconcat(barMean, data=data)
Esempio n. 15
0
def _draw_heatmap_by_substance_name(df,
                                    substance_name: str) -> t.List[alt.Chart]:

    # From dataframe filtering data for given substance
    df_by_substance_name = df.query(
        f'substance_name == "{substance_name}"'
    ) if substance_name != 'ALL_SUBSTANCES' else df
    # Sharing the y axis with other columns in visualization
    heat_maps = alt.hconcat().resolve_scale(y='shared')

    # Grouping data by age group and month-year of death
    groups = df_by_substance_name.groupby(['age_group', 'year_month_of_death'])
    # Calculating mean of substance amount
    all_data = groups['substance_amount'].mean().reset_index(
        name=color_coding_label)
    heat_maps |= _get_heatmap(all_data, substance_name)

    groups = df_by_substance_name.query('sex == "M"').groupby(
        ['age_group', 'year_month_of_death'])
    male_data = groups['substance_amount'].mean().reset_index(
        name=color_coding_label)
    heat_maps |= _get_heatmap(male_data, substance_name, "Male")

    groups = df_by_substance_name.query('sex == "F"').groupby(
        ['age_group', 'year_month_of_death'])
    female_data = groups['substance_amount'].mean().reset_index(
        name=color_coding_label)
    heat_maps |= _get_heatmap(female_data, substance_name, "Female")

    return heat_maps
def make_chart_organisational_diversity(
    org_coeffs,
    num_orgs,
    metric_params,
    org_type_lookup,
    paper_counts,
    save=True,
    fig_num=14,
):
    """Plot comparing the organisational diversity coefficients"""

    # Regression coefficients sorted
    selected = (org_coeffs[metric_params].sort_values("beta").head(
        n=num_orgs).reset_index(drop=False))

    selected["org_type"] = selected["index"].map(org_type_lookup)
    selected["order"] = range(0, len(selected))

    # Paper counts by organisation
    recent_papers_orgs = (paper_counts.loc[selected["index"]].reset_index(
        name="papers").rename(columns={"index": "org"}))
    recent_papers_orgs["order"] = range(0, len(recent_papers_orgs))
    recent_papers_orgs["org_type"] = recent_papers_orgs["org"].map(
        org_type_lookup)

    b_ch = (alt.Chart(selected).mark_bar().encode(
        y=alt.Y("index", sort=alt.EncodingSortField("order"), title=""),
        x=alt.X("beta", title="Coefficient on diversity"),
        color=alt.X("org_type", title="Organisation type"),
    )).properties(width=150, height=600)

    b_err = (alt.Chart(selected).mark_errorbar().encode(
        y=alt.Y(
            "index",
            sort=alt.EncodingSortField("order"),
            title="",
            axis=alt.Axis(ticks=False, labels=False),
        ),
        x=alt.X("lower", title=""),
        x2="upper",
    )).properties(width=150, height=600)

    b_act = (alt.Chart(recent_papers_orgs).mark_bar().encode(
        y=alt.Y(
            "org",
            title=None,
            sort=alt.EncodingSortField("order"),
            axis=alt.Axis(labels=False, ticks=False),
        ),
        x=alt.X("papers"),
        color="org_type",
    )).properties(width=100, height=600)

    out = (b_ch + b_err).resolve_scale(y="independent")
    out_2 = alt.hconcat(out, b_act, spacing=0).resolve_scale(y="shared")

    if save is True:
        save_altair(out_2, f"fig_{fig_num}_comp", driv)

    return out_2
Esempio n. 17
0
    def _plots(self):
        tic = time.time()
        # if self._sensor_groups is None:
        #     self._sensor_groups = [self.in_default]
        with st.spinner('Generating Plots'):
            plot = alt.hconcat(
                self.plotNonTime(
                    'T_diff', 'T_diff_eff').properties(width=self.def_width),
                self.plotNonTime('solar_w',
                                 'geo_tot_w').properties(width=self.def_width))

        plot = plot.configure_axis(
            labelFontSize=self.label_font_size,
            titleFontSize=self.title_font_size,
            titlePadding=41,
            domain=False).configure_legend(
                labelFontSize=self.label_font_size,
                titleFontSize=self.title_font_size).configure_view(
                    cornerRadius=2)

        message([F"{'Altair plot gen:': <20}", F"{time.time() - tic:.2f} s"],
                tbl=self.mssg_tbl,
                mssgType='TIMING')

        return [plot, plot]
Esempio n. 18
0
def createChart(data, name=''):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")


    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("rating:Q", axis=alt.Axis(title="The number of restaurants")),
            alt.Y('name:O', axis=alt.Axis(title="Cuisines".format(name)),
                  sort=alt.SortField(field="rating", op="mean", order='descending')),
            alt.ColorValue("LightGrey", condition=color_condition),
        ).properties(
            selection = highlight_selection,
        )

    return alt.hconcat(
        barMean,
        data=data,
        title="The number of restaurants ({} in NYC) - Top 25 cuisines".format(
            name))
Esempio n. 19
0
def tv_linkedScatterPlot(data, engine, xlabel, ylabel1, ylabel2):

    data = data.copy()
    # data['year'] = data.apply(lambda x : x.name.year, axis=1)
    data.rename(columns={
        'plotY': xlabel,
        'plotX1': ylabel1,
        'plotX2': ylabel2
    },
                inplace=True)
    interval = alt.selection(type='interval', encodings=['x', 'y'])

    base = alt.Chart(data)
    base = base.mark_point()

    lplot = base.encode(x=ylabel1,
                        y=alt.Y('{0}:Q'.format(xlabel),
                                axis=alt.Axis(format='~s')),
                        color=alt.condition(interval, 'anfreq_label',
                                            alt.value('lightgray')))
    lplot = lplot.properties(selection=interval, width=260, height=300)

    rplot = base.encode(x=ylabel2,
                        y=alt.Y('{0}:Q'.format(xlabel),
                                title='',
                                axis=alt.Axis(labels=False)),
                        color=alt.condition(interval, 'anfreq_label',
                                            alt.value('lightgray')))
    rplot = rplot.properties(selection=interval, width=260, height=300)

    p = alt.hconcat(lplot, rplot, spacing=0)

    return p
Esempio n. 20
0
def get_chart(keyword):
    db_string = "postgres://*****:*****@postgres:5432/shared"
    if keyword == "*":
        query = "select article_id,string_date,site,palabra,n_w from tb_news_covid_mexico_palabras_top_tfidf"
    else:
        query = "select article_id,string_date,site,palabra,n_w from tb_news_covid_mexico_palabras_top_tfidf where article_id in (select article_id from tb_news_covid_mexico_date_text where clean_text LIKE '%" + keyword + "%' )"
    db = create_engine(db_string)
    df = pd.read_sql_query(sqlalchemy.text(query), db)
    chart3 = alt.Chart(df).mark_point().encode(
        y='count()', x='string_date:T').properties(width=900).interactive()

    chart1 = alt.Chart(df).mark_bar().encode(
        x=alt.X('count(article_id):Q'),
        y=alt.Y("site:N",
                sort=alt.EncodingSortField(
                    field="site", op="count",
                    order="descending"))).transform_aggregate(
                        groupby=["article_id", "site"]).properties(height=800)

    chart2 = alt.Chart(df).mark_bar().encode(
        x=alt.X('freq_palabras:Q', aggregate="sum"),
        y=alt.Y(
            "palabra",
            sort=alt.EncodingSortField(
                field="freq_palabras", op="sum",
                order="descending"))).transform_aggregate(
                    freq_palabras='sum(n_w)',
                    groupby=["palabra"],
                ).transform_window(
                    rank='row_number()',
                    sort=[alt.SortField("freq_palabras", order="descending")],
                ).transform_filter(
                    (alt.datum.rank < 25)).properties(height=800)

    return alt.vconcat(chart3, alt.hconcat(chart1, chart2)).to_json()
Esempio n. 21
0
def plot_class_report(learn):
    alt.renderers.enable('notebook')
    clsrpt = class_report(learn)
    charts = []
    for key in clsrpt.keys():
        fildata = clsrpt[key]
        fildatax = []
        fildatay = []
        supp = key + ', Support: '
        for xxx in fildata:
            if xxx == 'support':
                supp += str(fildata[xxx])
                continue
            fildatax.append(xxx)
            fildatay.append(fildata[xxx])
        df = pd.DataFrame({'x': fildatax, 'y': fildatay})
        bars = alt.Chart(df, width=200).mark_bar(size=30).encode(
            x=alt.X("x", axis=alt.Axis(labelAngle=0, title='')),
            y=alt.Y('y',
                    axis=alt.Axis(title=''),
                    scale=alt.Scale(domain=(0, 1))),
        )

        text = alt.Chart(df).mark_text(baseline='bottom', dy=-1).encode(
            x='x', y='y', text=alt.Text('y', format='.2f'))

        chart = bars + text

        charts.append(chart.properties(title=supp))

    return reduce((lambda x, y: alt.hconcat(x, y)), charts)
Esempio n. 22
0
def save_plot_movies(name):
    movies_ratings = movies.merge(ratings.groupby('movie_id',
                                                  as_index=False).agg({
                                                      'rating':
                                                      ['count', 'mean']
                                                  }).flatten_cols(),
                                  on='movie_id')

    genre_filter = alt.selection_multi(fields=['genre'])
    genre_chart = alt.Chart().mark_bar().encode(
        x="count()",
        y=alt.Y('genre'),
        color=alt.condition(genre_filter, alt.Color("genre:N"),
                            alt.value('lightgray'))).properties(
                                height=300, selection=genre_filter)

    (movies_ratings[['title', 'rating count',
                     'rating mean']].sort_values('rating count',
                                                 ascending=False).head(10))

    (movies_ratings[['title', 'rating count', 'rating mean']].mask(
        'rating count',
        lambda x: x > 20).sort_values('rating mean', ascending=False).head(10))

    plot = alt.hconcat(filtered_hist('rating count', '# ratings / movie',
                                     genre_filter),
                       filtered_hist('rating mean', 'mean movie rating',
                                     genre_filter),
                       genre_chart,
                       data=movies_ratings)

    plot.save(name)
def movie_embedding_norm(models):
    """Visualizes the norm and number of ratings of the movie embeddings.
    Args:
      model: A MFModel object.
    """
    if not isinstance(models, list):
        models = [models]
    df = pd.DataFrame({
        'title': movies['title'],
        'genre': movies['genre'],
        'num_ratings': movies_ratings['rating count'],
    })
    charts = []
    brush = alt.selection_interval()
    for i, model in enumerate(models):
        norm_key = 'norm' + str(i)
        df[norm_key] = np.linalg.norm(model.embeddings["movie_id"], axis=1)
        nearest = alt.selection(
            type='single', encodings=['x', 'y'], on='mouseover', nearest=True,
            empty='none')
        base = alt.Chart().mark_circle().encode(
            x='num_ratings',
            y=norm_key,
            color=alt.condition(brush, alt.value('#4c78a8'), alt.value('lightgray'))
        ).properties(
            selection=nearest).add_selection(brush)
        text = alt.Chart().mark_text(align='center', dx=5, dy=-5).encode(
            x='num_ratings', y=norm_key,
            text=alt.condition(nearest, 'title', alt.value('')))
        charts.append(alt.layer(base, text))
    return altair_viewer.show(alt.hconcat(*charts, data=df))
Esempio n. 24
0
def make_figure(x_axis, y_axis):

    brush = alt.selection_interval()
    base = alt.Chart(cars)

    # scatter plot of x vs y
    scatter = (
        base.mark_point()
        .encode(x=x_axis, y=y_axis, color="Origin:N")
        .properties(width=250, height=400, selection=brush)
    )

    # histogram of horsepower
    hist = (
        base.mark_bar()
        .encode(x=alt.X("Horsepower:Q", bin=True), y="count()", color="Origin:N")
        .transform_filter(brush.ref())
    ).properties(height=375)

    chart = alt.hconcat(scatter, hist)

    # Save html as a StringIO object in memory
    cars_html = io.StringIO()
    chart.save(cars_html, "html")

    # Return the html from StringIO object
    return cars_html.getvalue()
def world_map_for_factors(highlight, dataset, select_year):

    cols = alt.hconcat()
    for val in dataset:
        map = alt.Chart(df).mark_geoshape(
            stroke='#aaa', strokeWidth=0.25
        ).encode(
            x = alt.X("Country Name"),
            color=alt.condition(highlight, val, alt.value('lightgrey'), scale=alt.Scale(scheme='yelloworangered'), title=""),
            tooltip=["Country Name"] + dataset
        ).transform_lookup(
            lookup='Country Name',
            from_=alt.LookupData(
                "https://raw.githubusercontent.com/KoGor/Map-Icons-Generator/master/data/world-110m-country-names.tsv",
                'name', ['id', "name"])
        ).transform_lookup(
            lookup='id',
            from_=alt.LookupData(countries, 'id', fields=["id", "type", "properties", "geometry"])
        ).project(
            type="equirectangular"
        ).properties(
            width=500,
            height=200,
            title=val,
        ).add_selection(select_year, highlight) \
            .transform_filter(select_year)

        cols &= map
    return cols.resolve_scale(color='independent')
Esempio n. 26
0
def make_etc_coverage_heatmap(etc_coverage, mag_order=None, module_order=None):
    num_mags_in_frame = len(set(etc_coverage['genome']))
    charts = list()
    for i, (etc_complex, frame) in enumerate(etc_coverage.groupby('complex')):
        # if this is the first chart then make y-ticks otherwise none
        c = alt.Chart(frame, title=etc_complex).encode(
            x=alt.X('module_name',
                    title=None,
                    axis=alt.Axis(labelLimit=0, labelAngle=90),
                    sort=module_order),
            y=alt.Y('genome',
                    axis=alt.Axis(title=None, labels=False, ticks=False),
                    sort=mag_order),
            tooltip=[
                alt.Tooltip('genome', title='Genome'),
                alt.Tooltip('module_name', title='Module Name'),
                alt.Tooltip('path_length', title='Module Subunits'),
                alt.Tooltip('path_length_coverage', title='Subunits present'),
                alt.Tooltip('genes', title='Genes present'),
                alt.Tooltip('missing_genes', title='Genes missing')
            ]).mark_rect().encode(
                color=alt.Color('percent_coverage',
                                legend=alt.Legend(title='% Complete'),
                                scale=alt.Scale(domain=(0, 1)))).properties(
                                    width=HEATMAP_CELL_WIDTH *
                                    len(set(frame['module_name'])),
                                    height=HEATMAP_CELL_HEIGHT *
                                    num_mags_in_frame)
        charts.append(c)
    concat_title = alt.TitleParams('ETC Complexes', anchor='middle')
    return alt.hconcat(*charts, spacing=5, title=concat_title)
def create_ridgeline_plot(data):
    """A function that creates a ridgeline plot for covid_19 CAN & USA dataset.

    Parameters
    ----------
    data
        input data set from preprocessed csv.

    Returns
    -------
    altair object
        returns the plot as a altair object
    """
    usa = data.query(
        "iso_code == @ISO_CODES['USA'] and new_tests > 0 and date > @START_DATE and date <= @END_DATE"
    ).reset_index(drop=True)
    can = data.query(
        "iso_code == @ISO_CODES['CANADA'] and  new_tests > 0 and date >= @START_DATE and date <= @END_DATE"
    ).reset_index(drop=True)

    can_usa_plt = alt.hconcat(
        generate_ridgeline_plot(can, "Canada"),
        generate_ridgeline_plot(usa, "USA"),
        title="COVID-19 Response Ratio - Canada vs USA").configure_facet(
            spacing=0).configure_view(stroke=None).configure_title(
                anchor='middle')

    return can_usa_plt
Esempio n. 28
0
def create_distribution_figure(df, by_class, size=175):
    """ 
    Create an altair chart for each column in a dataframe. Optionally, plot 
    distribution by a target class. 
    
    Parameters 
    ----------
    df : pandas DataFrame 
    by_class : boolean
        Whether to plot data by class 
    size : integer 
        Size (width & height) of the returned plot 
    
    Returns 
    -------
    Altair chart 
    """
    plot_rows = alt.vconcat(data=df)
    n_cols = 3
    n_rows = (len(df.columns) - 1) // n_cols + 1

    for i in range(0, len(df.columns), n_cols):
        current_row = df.columns[i:i + n_cols]
        plot_cols = alt.hconcat()
        target_labels = df.target.unique()

        for df_col in current_row:
            if df_col == "target":
                trg = df.target.value_counts().reset_index().rename(
                    columns={
                        "target": "count",
                        "index": "label"
                    })
                cht = alt.Chart(trg).mark_bar(
                    stroke="black", strokeWidth=3).encode(
                        x=alt.X("label:O", title="target"),
                        y=alt.Y("count:Q", title="Count"),
                        color=alt.Color(
                            "label:O",
                            scale=alt.Scale(
                                domain=target_labels,
                                range=get_target_colors(target_labels)),
                            legend=None),
                        tooltip=["label", "count"]).properties(width=size,
                                                               height=size,
                                                               title="target")
                plot_cols |= cht
            else:
                plot_cols |= create_grouped_kde(df,
                                                df_col,
                                                "target",
                                                by_class,
                                                size=size)
        plot_rows &= plot_cols
    chart = plot_rows.configure_legend(
        orient="top", titleFontSize=10, labelFontSize=10).configure_title(
            fontSize=14, anchor="middle").configure_axis(grid=False,
                                                         labelAngle=0)
    return chart
Esempio n. 29
0
def single_figure(text, o_html, full_pds):

    subtext = ['Parameters:']
    tooltip = ['sample_name', 'PHATE1', 'PHATE2']

    circ = alt.Chart(full_pds).mark_point(size=20).encode(x='PHATE1:Q',
                                                          y='PHATE2:Q')

    has_cats = 0
    has_nums = 0
    if 'variable' in full_pds.columns:
        dtypes_set = set(full_pds['dtype'])
        if 'categorical' in dtypes_set:
            cats = full_pds.loc[full_pds.dtype == 'categorical']
            cats_init = sorted(
                [x for x in cats['variable'] if str(x) != 'nan'],
                key=lambda x: -len(x))[0]
            cats_dropdown = alt.binding_select(
                options=cats['variable'].unique(), name='variable:')
            cats_select = alt.selection_single(fields=['variable'],
                                               bind=cats_dropdown,
                                               name="categorical variable",
                                               init={'variable': cats_init})
            cats_plot = make_subplot(circ, cats_select, list(tooltip), 'N')
            has_cats = 1

        if 'numerical' in dtypes_set:
            nums = full_pds.loc[full_pds.dtype == 'numerical']
            cats_init = sorted(
                [x for x in nums['variable'] if str(x) != 'nan'],
                key=lambda x: -len(x))[0]
            nums_dropdown = alt.binding_select(
                options=nums['variable'].unique(), name='variable:')
            nums_select = alt.selection_single(fields=['variable'],
                                               bind=nums_dropdown,
                                               name="numerical variable",
                                               init={'variable': cats_init})
            nums_plot = make_subplot(circ, nums_select, list(tooltip), 'Q')
            has_nums = 1

    title = {
        "text": text,
        "color": "black",
    }
    if subtext != ['Parameters:']:
        title.update({
            "subtitle": (subtext + ["(based on altair)"]),
            "subtitleColor": "grey"
        })

    if has_nums and has_cats:
        circ = alt.hconcat(cats_plot, nums_plot)
    elif has_nums:
        circ = nums_plot
    elif has_cats:
        circ = cats_plot

    circ.save(o_html)
    print('-> Written:', o_html)
def get_interactive_proportions_plot(gender_balance):
    source = data_frames[gender_balance]
    pts = alt.selection(type="multi", encodings=['x'])

    lin = alt.Chart(source).mark_line().encode(
        alt.X('year:O', title='Year'),
        alt.Y('female_prop:Q',
              title="Proportion of Women",
              axis=alt.Axis(format='%'),
              scale=alt.Scale(domain=[0, 1])),
        alt.Color('job:N', legend=None)).transform_filter(pts).properties(
            width=500, height=375, title="Proportion of Women by Year")

    label = alt.selection_single(
        encodings=['x'],  # limit selection to x-axis value
        on='mouseover',  # select on mouseover events
        nearest=True,  # select data point nearest the cursor
        empty='none'  # empty selection includes no data points
    )

    lin_w_interaction = alt.layer(
        lin,  # base line chart
        alt.Chart().mark_rule(color='#aaa').encode(
            x='year:O').transform_filter(label),
        lin.mark_circle().encode(opacity=alt.condition(label, alt.value(
            1), alt.value(0))).add_selection(label),
        lin.mark_text(
            align='left', dx=5, dy=-5, stroke='white',
            strokeWidth=2).encode(text=alt.Text(
                'female_prop:Q', format='.2%')).transform_filter(label),
        lin.mark_text(align='left', dx=5, dy=-5).encode(text=alt.Text(
            'female_prop:Q', format='.2%')).transform_filter(label),
        data=source)

    bar = alt.Chart(source).mark_bar(size=30).encode(
        y=alt.Y('job:N',
                title='',
                sort=alt.EncodingSortField(field="total_prop_female",
                                           op="sum",
                                           order="descending")),
        x=alt.X('total_prop_female:Q',
                title="Proportion of Women",
                axis=alt.Axis(format='%')),
        color=alt.condition(pts, alt.Color(
            'job:N', legend=None), alt.ColorValue("grey"))).properties(
                width=250,
                height=375,
                title="Jobs by Proportion of Women (For the 10 most " +
                gender_balance + " jobs)").add_selection(pts)

    interactive_job_chart = alt.hconcat(lin_w_interaction, bar).resolve_legend(
        color="independent",
        size="independent").configure_axis(labelFontSize=13, titleFontSize=14)
    # Save html as a StringIO object in memory
    job_gender_proportions_html = io.StringIO()
    interactive_job_chart.save(job_gender_proportions_html, 'html')

    # Return the html from StringIO object
    return job_gender_proportions_html.getvalue()
Esempio n. 31
0
    x='x',
    y='y'
).transform_filter(
    pts.ref()
).properties(
    width=300,
    height=300
)

# right panel: histogram
mag = alt.Chart().mark_bar().encode(
    x='mbin:N',
    y="count()",
    color=alt.condition(pts, alt.value("black"), alt.value("lightgray"))
).properties(
    selection=pts,
    width=300,
    height=300
)

# build the chart:
alt.hconcat(
    points,
    mag,
    data=source
).transform_bin(
    "mbin",
    field="m",
    bin=alt.Bin(maxbins=20)
)
Esempio n. 32
0
"""
World Projections
-----------------
This example shows a map of the countries of the world using four available
geographic projections. For more details on the projections available in
Altair, see https://vega.github.io/vega-lite/docs/projection.html
"""
# category: maps
import altair as alt
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

base = alt.Chart(source).mark_geoshape(
    fill='#666666',
    stroke='white'
).properties(
    width=300,
    height=180
)

projections = ['equirectangular', 'mercator', 'orthographic', 'gnomonic']
charts = [base.project(proj).properties(title=proj)
          for proj in projections]

alt.vconcat(
    alt.hconcat(*charts[:2]),
    alt.hconcat(*charts[2:])
)