Ejemplo n.º 1
0
def scatter(x,
            y,
            s=None,
            color=None,
            style=None,
            size_by=None,
            sizes=None,
            x_type="quantitative",
            color_type="nominal",
            size_type="quantitative",
            data=None,
            palette=None,
            saturation=1,
            size=None,
            aspect=1):
    if data is None:
        data, fields = build_dataframe({"x": x, "y": y})
        x, y = fields["x"], fields["y"]

    encodings = {
        "x": alt.X(field=x, type=x_type, axis={"title": x}),
        "y": alt.Y(field=y, type="quantitative", axis={"title": y}),
    }
    if color:
        if isinstance(color, alt.Color):
            encodings["color"] = color
        elif color in list(data.columns):
            encodings["color"] = alt.Color(field=color, type=color_type)
        else:
            encodings["color"] = alt.Color(value=vega_color(color))
    if style:
        if style in list(data.columns):
            encodings["shape"] = alt.Shape(field=style, type="nominal")
        else:
            encodings["shape"] = alt.Shape(value=style)
    if size_by:
        if size_by in list(data.columns):
            size_enc = dict(field=size_by, type=size_type)
            if sizes:
                size_enc["scale"] = dict(range=sizes)
        else:
            size_enc = dict(value=size_by)
        encodings["size"] = size_enc

    chart = alt.Chart(data).mark_point(filled=True).encode(**encodings)
    size_chart(chart, size, aspect)
    pal = vega_palette(palette, None, saturation, vega_type=color_type)
    if color_type == "nominal":
        chart = chart.configure_range(category=pal)
    else:
        chart = chart.configure_range(ramp=pal)
    return chart
Ejemplo n.º 2
0
def plot_scatter_alt(
    self,
    color='run:Q',
    color_scheme='purplebluegreen',
    shape='method:N',
    width=400,
    height=400,
):
    """Plot the trials total flops vs max size.
    """
    import altair as alt

    df = self.to_df()
    scatter = (alt.Chart(df).mark_point().encode(
        x=alt.X('size:Q', title='log2[SIZE]', scale=alt.Scale(zero=False)),
        y=alt.Y('flops:Q', title='log10[FLOPS]', scale=alt.Scale(zero=False)),
        size=alt.Size(
            'random_strength:Q',
            scale=alt.Scale(range=[50, 150], type='log'),
            legend=None,
        ),
        shape=alt.Shape(shape),
        color=alt.Color(color, scale=alt.Scale(scheme=color_scheme)),
        tooltip=list(df.columns)))
    return (scatter.properties(
        width=width,
        height=height,
    ).configure_axis(gridColor='rgb(248, 248, 248)')).interactive()
Ejemplo n.º 3
0
def _make_volume_comparison_plot(df):
    # Consolidate by week instead of by day
    df = df[["day", "is_health", "bytes_up", "bytes_down"
             ]].groupby([pd.Grouper(key="day", freq="W-MON"),
                         "is_health"]).sum().reset_index()

    df = df.melt(id_vars=["is_health", "day"],
                 value_vars=["bytes_up", "bytes_down"],
                 var_name="direction",
                 value_name="bytes")

    df["bytes"] = df["bytes"] / 1000**3

    alt.Chart(df).mark_point(opacity=1.0).encode(
        x=alt.X("day:T"),
        y=alt.Y(
            "bytes:Q",
            title="Weekly Traffic(GB)",
            axis=alt.Axis(labels=True),
            scale=alt.Scale(type="symlog"),
        ),
        # shape="direction",
        color=alt.Color(
            "direction",
            title="Type",
        ),
        shape=alt.Shape("is_health:N")).properties(width=500, ).save(
            "renders/health_domain_overall_magnitude.png",
            scale_factor=2,
        )
Ejemplo n.º 4
0
def graph_transit_filtre(df_transit, date_debut, date_fin, o_d):
    """
    pour visualiser les graph de seprataion des trajets de transit et des autres
    en entree :
        df_transit : df des o_d
        date_debut : string au format 2019-01-28 00:00:00
        date_fin : string au format 2019-01-28 00:00:00
        o_d : origine destination parmi les possibles du df_transit
    en sortie : 
        une chart altair avec en couleur le type de transit ou non, et en forme la source du temps de parcours pour filtrer   
    """
    titre=pd.to_datetime(date_debut).day_name(locale ='French')+' '+pd.to_datetime(date_debut).strftime('%Y-%m-%d')+' : '+o_d
    test_filtre_tps=(df_transit.loc[(df_transit['date_cam_1']>pd.to_datetime(date_debut)) &
                                             (df_transit['date_cam_1']<pd.to_datetime(date_fin)) &
                                             (df_transit['o_d']==o_d)]).copy()
    copie_df=test_filtre_tps[['date_cam_1','tps_parcours','filtre_tps', 'type']].copy()
    copie_df.tps_parcours=pd.to_datetime('2018-01-01')+copie_df.tps_parcours
    try : 
        copie_df['filtre_tps']=copie_df.apply(lambda x : 'Transit' if x['filtre_tps'] else 'Local',axis=1)
        copie_df['type']=copie_df.apply(lambda x : 'Reglementaire' if x['type']=='85eme_percentile' else x['type'],axis=1)
    except ValueError : 
        pass
    graph_filtre_tps = alt.Chart(copie_df, title=titre).mark_point().encode(
                                x=alt.X('date_cam_1:T',axis=alt.Axis(title='Horaire', format='%Hh%M')),
                                y=alt.Y('tps_parcours:T',axis=alt.Axis(title='Temps de parcours', format='%H:%M')),
                                tooltip='hoursminutes(tps_parcours)',
                                color=alt.Color('filtre_tps:N', legend=alt.Legend(title="Type de trajet")),
                                shape=alt.Shape('type:N',legend=alt.Legend(title="Source temps de reference"))).interactive().properties(width=800,height=400)
    return graph_filtre_tps
Ejemplo n.º 5
0
def visualize_clusters(df, n_clusters, range_):
    graph = alt.Chart(df.reset_index()).mark_point(filled=True, size=60).encode(
        x=alt.X('Component 2'),
        y=alt.Y('Component 1'),
        shape=alt.Shape('playlist', scale=alt.Scale(range=["circle", "diamond", "square", "triangle-down", "triangle-up"])),
        color=alt.Color('Cluster', scale=alt.Scale(domain=[i for i in range(n_clusters)], range=range_)),
        tooltip=['name', 'artist']
    ).interactive()
    st.altair_chart(graph, use_container_width=True)
Ejemplo n.º 6
0
def plot(data, phase, field, center=False, scale=None, shape=(400, 400)):
    detalle = [
        'test_acc', 'test_teacher/acc', 'test_loss', 'test_eval', 'train_acc',
        'train_teacher/acc', 'train_loss', 'train_eval', 'distillation', 'temp'
    ]

    field_dict = {
        'acc': "Accuracy [%]",
        'eval': "Perdida Cross Entropy",
        'loss': "Perdida de Destilación"
    }

    if scale is None:
        print(".")
        scale = 'log' if field in ['loss', 'eval'] else 'linear'
    else:
        print("scale", scale)

    print("lasorra")
    #data['train_acc']-=data['test_acc']

    bar = alt.Chart(data).mark_point().encode(
        alt.X('temp:O',
              scale=alt.Scale(
                  zero=False,
                  base=10,
                  type='log',
              ),
              title="Temperatura"),
        alt.Y('%s_%s' % (phase, field),
              scale=alt.Scale(zero=False, type=scale),
              title=field_dict[field]),
        shape=alt.Shape('distillation',
                        legend=alt.Legend(title="Destilación")),
        color=alt.Color('student', legend=alt.Legend(title="Modelo")),
        size=alt.value(50),
        tooltip=detalle).interactive()

    if field == 'acc':
        accs = {
            'Model': ['MobileNet', 'ResNet18', 'ResNet101'],
            'ce_train': [95.73, 98.15, 98.52],
            'ce_test': [87.8, 90.58, 90.68]
        }
        if center:
            d = dict(list(zip(accs['Model'], accs['ce_%s' % phase])))
            data['%s_acc' % phase] -= [d[i] for i in data['student']]
        else:
            df = pd.DataFrame(accs)
            aggregates = alt.Chart(df).mark_rule(opacity=0.5).encode(
                y='ce_%s:Q' % phase, color='Model:N', size=alt.value(2))

            return (aggregates + bar).properties(width=shape[0],
                                                 height=shape[1])

    return bar.properties(width=shape[0], height=shape[1])
Ejemplo n.º 7
0
def senti_circleplot(senti_df, student_id):
    """circle plot for sentiment"""
    senti_circle = (alt.Chart(senti_df).mark_point(
        size=100, fillOpacity=0.7).encode(
            alt.X(student_id),
            alt.Y(cts.SENTI),
            alt.Color(cts.ASSIGNMENT, legend=alt.Legend(orient="left")),
            alt.Shape(cts.ASSIGNMENT, legend=None),
            tooltip=[
                alt.Tooltip(cts.SENTI, title="polarity"),
                alt.Tooltip(student_id, title="author"),
            ],
        )).interactive()
    return senti_circle
Ejemplo n.º 8
0
def area_chart_f(df_covid19_region, region_title, x_option, y_vars, stack,
                 solve_y_scale, order_var, *y_line_var):

    x_var, x_axis = set_x_axis(x_option)

    base = (alt.Chart(df_covid19_region).encode(x=x_axis, ).properties(
        height=150,
        width=180,
    ))

    area = (base.mark_area(
        opacity=0.7, line=True).transform_fold(y_vars).encode(
            y=alt.Y('value:Q', stack=stack, axis=alt.Axis(title='count')),
            color=alt.Color(
                'key:N',
                scale=alt.Scale(range=['#1f77b4', '#e41a1c', '#71f594']),
                legend=alt.Legend(title=None)),
            tooltip=[x_var, 'value:Q'],
        ))

    # order parameter. Either we do simple ordering or we give a dictionary with custom order
    if isinstance(order_var, dict):
        area = area.transform_calculate(
            order=order_var.__str__(), ).encode(order="order:Q")
    else:
        area = area.encode(order="key:N", )

    # add a line if requested
    if y_line_var:
        line_st_3 = (base.mark_line().encode(
            alt.Y(y_line_var[0] + ":Q", axis=alt.Axis(title='count')),
            color=alt.value('black'),
            size=alt.value(2),
            shape=alt.Shape('Active_cases_label',
                            legend=alt.Legend(title=None))))
        area = alt.layer(
            area,
            line_st_3,
        )

    # Facet construction
    if solve_y_scale:
        y_scale_rs = "independent"
    else:
        y_scale_rs = "shared"

    area = (area.facet(facet=region_title + ':N',
                       columns=3).resolve_scale(y=y_scale_rs))

    return area
Ejemplo n.º 9
0
def ScatterPlot(    dataframe, 
                    x,
                    y,
                    color_by, 
                    marker_by, 
                    size_by,
                    marker_color,
                    marker_shape,
                    marker_size, 
                    marker_thickness,
                    xlabel = None,
                    ylabel = None
                ):

    if (type(color_by) == type(None)):
        color = alt.value(marker_color)
    else:
        color = alt.Color(color_by, type = 'ordinal', scale=alt.Scale(range = 'category'))    

    if (type(marker_by) == type(None)):
        marker = alt.value(marker_shape)
    else:
        marker = alt.Shape(marker_by, type = 'ordinal')

    if (type(size_by) == type(None)):
        size = alt.value(marker_size)
    else:
        size = alt.Size(size_by, type = 'quantitative')
    
    if (type(xlabel) == type(None)):
        xlabel = x

    if (type(ylabel) == type(None)):
        ylabel = y


    chart = alt.Chart(dataframe).mark_point().encode(
        x = alt.X(x, title=xlabel),
        y = alt.Y(y, title=ylabel),
        shape = marker,
        color = color,
        size = size,
        strokeWidth = alt.value(marker_thickness)
    )

    return chart
Ejemplo n.º 10
0
def grafico_con_columnas(
        source,
        y='test_acc',
        title="Exactitud en validacion desacoplado segun destilacion",
        shape='student',
        column='feat_dist',
        color='layer',
        fill=None):

    #reduce data
    d = locals()
    ks = [
        i for i in d.keys()
        if i not in ['source', 'title', 'xscale', 'yscale', 'scale', 'bs']
    ]
    vals = [d[i] for i in ks]
    source = source.drop(columns=[i for i in source.columns if i not in vals])

    encodings = {
        "shape":
        alt.Shape("%s:O" % shape,
                  legend=alt.Legend(title=global_titles[shape])),
        "y":
        alt.Y(y, title=global_titles[y]),
        "column":
        alt.Column('%s:O' % column, title=global_titles[column]),
        "x":
        alt.X('%s:N' % color, title=global_titles[color]),
        "color":
        alt.Color('%s:N' % color,
                  legend=alt.Legend(title=global_titles[color])),
        "opacity":
        alt.value(0.5)
    }

    if fill is not None:
        encodings["fill"] = alt.Fill(
            '%s:O' % fill,
            legend=alt.Legend(title=global_titles[fill]),
            scale=alt.Scale(scheme='pastel1'))
    d1 = alt.Chart(
        source,
        title=title).mark_point(size=100).encode(**encodings).configure_axis(
            titleFontSize=12,
            labelFontSize=12).configure_title(fontSize=15).interactive()
    return d1.properties(width=70, height=600)
def plotter(v_x, v_y, fptp_winner, approval_winner, borda_winner,
            score_winner):
    SYSTEM = ["fptp_winner", "approval_winner", "borda_winner", "score_winner"]
    COLOR_NAMES = ["Draw", "Red", "Blue", "Green"]
    COLORS = ["#6b6b6b", "red", "#12a9e5", "#12e551"]
    COLOR_SCALE = alt.Scale(domain=COLOR_NAMES, range=COLORS)

    v_df = pd.DataFrame({
        "v_x": v_x,
        "v_y": v_y,
        "fptp_winner": fptp_winner,
        "approval_winner": approval_winner,
        "borda_winner": borda_winner,
        "score_winner": score_winner
    })
    df = pd.melt(v_df,
                 id_vars=["v_x", "v_y"],
                 var_name="SYSTEM",
                 value_name="winner")

    c_df = pd.DataFrame(CAND_DICT)
    c_df = pd.concat([c_df, c_df, c_df, c_df], sort=False, ignore_index=True)
    c_df["SYSTEM"] = SYSTEM * 3
    df = pd.concat([df, c_df], sort=False, ignore_index=True)

    candidate_plot = alt.Chart().mark_point(filled=True).encode(
        alt.X('C_X', title="x"),
        alt.Y('C_Y', title="y"),
        alt.Color('name', legend=None, scale=COLOR_SCALE),
        alt.Shape('name', legend=alt.Legend(title="Candidates")),
        alt.StrokeValue("black"),
        opacity=alt.value(1),
        size=alt.value(150))

    voter_plot = alt.Chart().mark_circle().encode(
        x='v_x',
        y='v_y',
        color=alt.Color('winner:N', legend=None,
                        scale=COLOR_SCALE)).properties(width=200)

    chart = alt.layer(
        voter_plot, candidate_plot,
        data=df).facet(column='SYSTEM:N').configure_header(labelFontSize=20)

    return chart
Ejemplo n.º 12
0
def createCholorpeth(df):
    states = alt.topo_feature(data.us_10m.url, 'states')
    return alt.Chart(df).mark_geoshape().encode(
        shape=alt.Shape(field='geo', type='geojson'),
        color=alt.Color(
            'normPositive:Q',
            scale=alt.Scale(scheme='reds'),
            legend=alt.Legend(
                title=["Confirmed Cases", "per 100,000 People", ""])),
        tooltip=[
            alt.Tooltip('state:N', title='State'),
            alt.Tooltip('positive:N', title='Confirmed Cases'),
            alt.Tooltip('death:N', title='Confirmed Deaths')
        ]).transform_lookup(
            lookup='id',
            from_=alt.LookupData(data=states, key='id'),
            as_='geo').properties(width=800,
                                  height=600).project(type='albersUsa')
Ejemplo n.º 13
0
def shape_plot(df, single_select):
    shape = alt.Chart(df).mark_circle(opacity=0.35).encode(
        alt.X('CO2 emissions (kt):Q'),
        alt.Y('CO2 emissions per capita:Q'),
        color=alt.condition(single_select,
                            'Country Name:N',
                            alt.value('lightgrey'),
                            scale=alt.Scale(scheme="tableau10")),
        shape=alt.Shape('Country Name:N', legend=None),
        size=alt.value(250),
    ).properties(width=300,
                 height=250,
                 title='CO2 total emissions and emissions per capita')

    shape_labels = shape.mark_text(align='center', baseline='middle',
                                   dy=-25).encode(text='Country Name',
                                                  size=alt.value(15))
    shapes = shape  # + shape_labels
    return shapes
Ejemplo n.º 14
0
def graph_delivery_dates(df):
    alt.themes.enable("opaque")
    source = df
    shapes = alt.Shape("name:N")

    colors = alt.Color("name:N", legend=alt.Legend(title="Something"))

    line = (alt.Chart(source).mark_line().encode(
        x="yearmonth(calendar_date):N",
        y="working_day:Q",
        color=alt.Color("name", legend=None),
    ))

    points = line.mark_point(filled=True, size=80,
                             opacity=1).encode(color=colors, shape="name")

    alt.layer(line,
              points).resolve_scale(color="independent",
                                    shape="independent").save("chart.png")
Ejemplo n.º 15
0
def plot_multi_relaxations(
    df: pandas.DataFrame,
    prop: List[str],
    title: str = "Relaxation Values",
    replace: Dict[str, str] = None,
) -> alt.Chart:
    """Helper to plot relaxation quantities using Altair.

    Args:
        df: DataFrame containing quantities to plot. There should be columns 
            with suffixes '_mean', and '_sem'.
        prop: The property to plot, with the respective values for that property 
            having suffixes.
        title: Custom title used to label the property.
        replace: Replace the names of variables

    """
    if isinstance(prop, str):
        prop = [prop]

    axis_format = "e"

    relax_chart_base = alt.Chart(
        df.query("variable in @prop").replace(replace)).encode(
            x=alt.X("inv_temp_norm:Q", title="Tm/T",
                    axis=alt.Axis(format="g")),
            color=alt.Color("pressure:N", title="Pressure"),
            shape=alt.Shape("variable", title="Relaxation"),
            y=alt.Y(
                "mean:Q",
                title=title,
                scale=alt.Scale(type="log"),
                axis=alt.Axis(format=axis_format),
            ),
            yError=alt.YError("sem:Q"),
        )

    return relax_chart_base.mark_errorbar() + relax_chart_base.mark_point()
Ejemplo n.º 16
0
    def make_graph(self):
        debug('new graph')
        get_element_size('graph_container', self.set_size)

        if self.xlabel is None or self.ylabel is None or self.mark is None:
            return

        mark = self.mark_options[self.mark][0]
        xlabel = self.columns[self.xlabel]
        ylabel = self.columns[self.ylabel]
        kwargs = {
            'x': alt.X(xlabel, type=self.guess_datatype(xlabel, mark)),
            'y': alt.Y(ylabel, type=self.guess_datatype(ylabel, mark))
        }

        if self.y2label is not None:
            y2label = self.columns[self.y2label]
            kwargs['y2'] = alt.Y2(y2label)

        if self.shape is not None:
            shape = self.columns[self.shape]
            kwargs['shape'] = alt.Shape(shape, type=self.guess_datatype(shape, mark))

        if self.colors is not None:
            colors = self.columns[self.colors]
            kwargs['color'] = alt.Color(colors, type=self.guess_datatype(colors, mark))

        mark_method = self.mark_options[self.mark][1]

        chart = mark_method(alt.Chart(self.data)).encode(
            **kwargs
        ).interactive().properties(
            width=self.width,
            height=self.height
        )

        set_attribute(self.graph_id, 'srcdoc', html.altair_plot(chart, with_iframe=False))
Ejemplo n.º 17
0
def make_chart(data, mark_type='point', mark_props={},
               x_title='x', x_props={},
               y_title='y', y_props={},
               color='x:N', shape='x:N',
               text_col=None, text_props={},
               other_axes_and_channels={}, interactive=True,
               scale_factor=1.0, svg=False,
               save_file='chart.html', on_save=None):

    chart = getattr(alt.Chart(data), f"mark_{mark_type}")(**mark_props)

    if 'scale' in x_props:
        x_props['scale'] = alt.Scale(domain=x_props['scale'])
    if 'scale' in y_props:
        y_props['scale'] = alt.Scale(domain=y_props['scale'])

    chart = chart.encode(
        x=alt.X(x_title, **x_props) if x_title else None,
        y=alt.Y(y_title, **y_props) if y_title else None,
        color=color if isinstance(color, str) else alt.Color(**color),
        shape=shape if isinstance(shape, str) else alt.Shape(**shape),
        **other_axes_and_channels)

    if text_col:
        text_chart = chart.mark_text(**text_props).encode(text=text_col)
        chart = (chart + text_chart)
        chart.properties(height=900)

    if interactive:
        chart = chart.interactive()

    if save_file:
        if svg:
            chart.save(save_file, scale_factor=scale_factor, embed_options={'renderer':'svg'})
        else:
            chart.save(save_file, scale_factor=scale_factor)
        if on_save: on_save(save_file)
Ejemplo n.º 18
0
def people_plot(df, select):
    # people
    df_new1 = Par_income_pre(df)

    person_img = 'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z'

    domains = [
        "par_q1", "par_q2", "par_q3", "par_q4", "par_q5", "par_top10pc",
        "par_top1pc", 'par_toppt1pc'
    ]

    shape_scale = alt.Scale(domain=domains,
                            range=[
                                person_img, person_img, person_img, person_img,
                                person_img, person_img, person_img, person_img
                            ])

    color_scale = alt.Scale(domain=domains,
                            range=[
                                '#4c78a8', '#f58518', '#e45756', '#72b7b2',
                                '#54a24b', '#54a200', '#00a235', '#45ff00'
                            ])

    base1 = alt.Chart(df_new1)

    people_chart = base1.transform_calculate(
        row="ceil(datum.index/10)").transform_calculate(
            col="datum.index-datum.row*10").mark_point(
                filled=True, opacity=1, size=50).encode(
                    alt.X('col:O', axis=None), alt.Y("row:O", axis=None),
                    alt.Shape("data:N", legend=None, scale=shape_scale),
                    alt.Color("data:N", legend=None,
                              scale=color_scale)).add_selection(
                                  select).transform_filter(select)

    return people_chart
Ejemplo n.º 19
0
def render_curve(df, ns=[], epsilons=[], save_path=None):
    """Render, and optionally save, a plot of the loss-data curve.
    Optionally takes arguments `ns` and `epsilons` to draw lines on the plot
    illustrating where metrics were calculated.
    Arguments:
    - df: (pd.DataFrame) the dataframe containing a loss-data curve as returned
        by LossDataEstimator.compute_curve or LossDataEstimator.to_dataframe.
    - ns: (list<num>) the list of training set sizes to use for computing
        metrics.
    - epsilons: (list<num>) the settings of epsilon used for computing SDL and
        eSC.
    - save_path: (str) optional: a path (ending in .pdf or .png) to save the
        chart. saving requires the
        [`altair-saver`](https://github.com/altair-viz/altair_saver/) package
        and its dependencies.
    Returns: an Altair chart. Note that this chart displays well in notebooks,
        so calling `render_curve(df)` without a save path will work well with
        Jupyter.
    """
    import altair as alt
    import altair_saver
    from . import altair_theme  # noqa: F401
    alt.data_transformers.disable_max_rows()

    if len(ns) > 0:
        ns = _closest_valid_ns(df, ns)

    title = 'Loss-data curve'
    color_title = 'Representation'
    xscale = alt.Scale(type='log')
    yscale = alt.Scale(type='log')

    x_axis = alt.X('samples', scale=xscale, title='Dataset size')
    y_axis = alt.Y('mean(val_loss)', scale=yscale, title='Validation loss')

    line = alt.Chart(df, title=title).mark_line()
    line = line.encode(
        x=x_axis, y=y_axis,
        color=alt.Color('name:N', title=color_title, legend=None),
    )

    point = alt.Chart(df, title=title).mark_point(size=80, opacity=1)
    point = point.encode(
        x=x_axis, y=y_axis,
        color=alt.Color('name:N', title=color_title,),
        shape=alt.Shape('name:N', title=color_title),
        tooltip=['samples', 'name']
    )

    rules_df = pd.concat([
        pd.DataFrame({'x': ns}),
        pd.DataFrame({'y': epsilons})
    ], sort=False)

    rule_x = alt.Chart(rules_df).mark_rule(strokeDash=[4, 4]).encode(x='x')
    rule_y = alt.Chart(rules_df).mark_rule(strokeDash=[4, 4]).encode(y='y')

    chart = alt.layer(rule_x, rule_y, line, point).resolve_scale(
        color='independent',
        shape='independent'
    )
    if save_path is not None:
        altair_saver.save(chart, save_path)
    return chart
Ejemplo n.º 20
0
def make_plot(infile):
    throughput = infra.pd.read_parquet(infile)

    # Fix plotting scale
    throughput["GB"] = throughput["bytes"] / (1000**3)

    temp = throughput.copy()
    temp = temp.groupby("day_bin").sum()
    print("Std dev", temp["GB"].std())
    print("Mean", temp["GB"].mean())

    # Generate a dense dataframe with all days and directions
    date_range = pd.DataFrame({
        "day_bin":
        pd.date_range(infra.constants.MIN_DATE,
                      infra.constants.MAX_DATE,
                      freq="1D")
    })
    category_range = pd.DataFrame({"throughput_type": ["Up", "Down", "Local"]},
                                  dtype=object)
    dense_index = infra.pd.cartesian_product(date_range, category_range)

    throughput = dense_index.merge(throughput,
                                   how="left",
                                   left_on=["day_bin", "throughput_type"],
                                   right_on=["day_bin", "throughput_type"
                                             ]).fillna(value={
                                                 "bytes": 0,
                                                 "GB": 0
                                             })

    throughput_windowed = throughput.set_index("day_bin").sort_index()

    throughput_windowed = throughput_windowed.groupby(
        ["throughput_type"]).rolling(window="7D", ).mean().reset_index()

    # Work around vega-lite legend merging bug
    label_order = {
        "Down": 1,
        "Up": 2,
        "Local": 3,
    }
    # Mergesort is stablely implemented : )
    throughput = throughput.sort_values(
        ["throughput_type"],
        key=lambda col: col.map(lambda x: label_order[x]),
        kind="mergesort",
    )
    throughput_windowed = throughput_windowed.sort_values(
        ["throughput_type"],
        key=lambda col: col.map(lambda x: label_order[x]),
        kind="mergesort",
    )

    points = altair.Chart(throughput).mark_point(opacity=0.5).encode(
        x=altair.X(
            "day_bin:T",
            title="Time",
            axis=altair.Axis(
                labelSeparation=5,
                labelOverlap="parity",
            ),
        ),
        y=altair.Y("GB:Q", title="GB Total Per Day"),
        color=altair.Color(
            "throughput_type",
            sort=None,
        ),
        shape=altair.Shape(
            "throughput_type",
            sort=None,
            legend=altair.Legend(
                title="",
                orient="top-left",
                fillColor="white",
                labelLimit=500,
                padding=5,
                strokeColor="black",
                columns=3,
            ),
        ))

    lines = altair.Chart(throughput_windowed).mark_line().encode(
        x=altair.X(
            "day_bin:T",
            title="Time",
            axis=altair.Axis(
                labelSeparation=5,
                labelOverlap="parity",
            ),
        ),
        y=altair.Y("GB:Q", title="GB Total Per Day"),
        color=altair.Color(
            "throughput_type",
            sort=None,
            legend=None,
        ),
    )

    (points + lines).resolve_scale(
        color='independent', shape='independent').properties(width=500).save(
            "renders/bytes_per_week.png", scale_factor=2)
Ejemplo n.º 21
0
                                      '#5218FA', '#FFD300', '#BD33A4',
                                      '#F8F8FF', '#B31B1B', '#E4717A',
                                      '#ED872D', '#ACE1AF', '#7BB661'
                                  ]),
                  legend=alt.Legend(title="Bloom Color"))
wcolor = alt.Color('bloom_color:N',
                   scale=alt.Scale(domain=[
                       'Blue', 'Yellow', 'Purple', 'White', 'Red', 'Pink',
                       'Orange', 'Green', 'None'
                   ],
                                   range=[
                                       '#D6B08C', '#CA8546', '#C9AA8D',
                                       '#BB6513', '#D7A36E', '#F2D4B7',
                                       '#A0856A', '#A97D51', '#D6B08C'
                                   ]))
shape = alt.Shape("species:N")


def format_plotdata(data, stats, Season):
    """
    creates a dataframe appropriate for plotting the desired density plan and section charts
    """
    seeddf = data.subdata.copy()

    #this for loop generates the plotting data
    choices = pd.DataFrame(
        (seeddf.index.values, seeddf["common_name"], seeddf["plants_per_yard"],
         seeddf["ht"], seeddf["bloom_color"], seeddf["spring"],
         seeddf["summer"], seeddf["autumn"]),
        index=None).T.rename(
            columns={
Ejemplo n.º 22
0
    'col': 2
}, {
    'country': 'United States',
    'animal': 'sheep',
    'col': 1
}])

domains = ['person', 'cattle', 'pigs', 'sheep']

shape_scale = alt.Scale(
    domain=domains,
    range=[
        'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z',
        'M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z',
        'M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z',
        'M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z'
    ])

color_scale = alt.Scale(domain=domains,
                        range=[
                            'rgb(162,160,152)', 'rgb(194,81,64)',
                            'rgb(93,93,93)', 'rgb(91,131,149)'
                        ])

alt.Chart(df).mark_point(filled=True).encode(
    alt.X('col:O', axis=None), alt.Y('animal:O', axis=None),
    alt.Row('country:N', header=alt.Header(title='')),
    alt.Shape('animal:N', legend=None, scale=shape_scale),
    alt.Color('animal:N', legend=None, scale=color_scale), alt.OpacityValue(1),
    alt.SizeValue(200)).properties(width=800, height=200)
Ejemplo n.º 23
0
def make_plot(infile):
    grouped_flows = infra.pd.read_parquet(infile)
    grouped_flows = grouped_flows.reset_index()

    working_times = grouped_flows.loc[
        (grouped_flows["day_bin"] < "2019-07-30") |
        (grouped_flows["day_bin"] > "2019-08-31")]
    grouped_flows["outage"] = "Outage"
    grouped_flows.loc[(grouped_flows["day_bin"] < "2019-07-30") |
                      (grouped_flows["day_bin"] > "2019-08-31"),
                      "outage"] = "Normal"

    alt.Chart(working_times).mark_boxplot().encode(
        x=alt.X('hour:O', title="Hour of the Day"),
        y=alt.Y('user:Q', title="Active User Count"),
    ).save(
        "renders/users_per_time_of_day_boxplot_exclude_outage.png",
        scale_factor=2,
    )

    alt.Chart(grouped_flows).mark_point(opacity=0.1).encode(
        x=alt.X('hour:O', title="Hour of the Day"),
        y=alt.Y('user:Q', title="Active User Count"),
        color=alt.Color(
            "outage",
            title="Condition",
        )).save(
            "renders/users_per_time_of_day_overplot.png",
            scale_factor=2,
        )

    aggregate = working_times.groupby(["hour"]).agg({
        "user": [
            "mean", lambda x: x.quantile(0.90), lambda x: x.quantile(0.99),
            "max"
        ]
    })
    # Flatten column names
    aggregate = aggregate.reset_index()
    aggregate.columns = [
        ' '.join(col).strip() for col in aggregate.columns.values
    ]
    aggregate = aggregate.rename(
        columns={
            "user mean": "Mean",
            "user <lambda_0>": "90th Percentile",
            "user <lambda_1>": "99th Percentile",
            "user max": "Max",
        })

    aggregate = aggregate.melt(
        id_vars=["hour"],
        value_vars=["Max", "99th Percentile", "90th Percentile", "Mean"],
        var_name="type",
        value_name="user")

    print(aggregate)
    # Create a hybrid chart to fix legend issue with line chart and shape
    lines = alt.Chart(aggregate).mark_line().encode(
        x=alt.X(
            'hour:O',
            title="Hour of the Day",
        ),
        y=alt.Y(
            'user:Q',
            title="Active User Count",
        ),
        color=alt.Color(
            "type",
            legend=None,
            sort=None,
        ),
    )

    points = lines.mark_point(size=100).encode(
        x=alt.X(
            'hour:O',
            title="Hour of the Day",
        ),
        y=alt.Y(
            'user:Q',
            title="Active User Count",
        ),
        color=alt.Color(
            "type",
            sort=None,
            legend=alt.Legend(
                orient="top-left",
                fillColor="white",
                labelLimit=500,
                padding=5,
                strokeColor="black",
            ),
        ),
        shape=alt.Shape(
            "type",
            title="",
            sort=None,
        ),
    )

    alt.layer(points, lines).resolve_scale(
        color='independent', shape='independent').save(
            "renders/users_per_time_of_day_lines.png",
            scale_factor=2,
        )
Ejemplo n.º 24
0
def test_convert_shape_fail_temporal(column):
    chart = alt.Chart(df).mark_point().encode(alt.Shape(column))
    mapping = convert(chart)
Ejemplo n.º 25
0
def test_quantitative_shape():
    chart = alt.Chart(df_quant).mark_point().encode(alt.Shape('shape'))
    mapping = convert(chart)
Ejemplo n.º 26
0
        y=alt.Y(
            my[plot]["y"][0],
            type=datatypes[my[plot]["y"][0]],
            axis=alt.Axis(title=my[plot]["y"][1], ),
            scale=alt.Scale(type=my[plot]["y"][2]),
        ),
        tooltip=[f"{my[plot]['x'][0]}", f"{my[plot]['y'][0]}", "product_name"],
        color=my_color,
    ).properties(width=1213,
                 height=750).interactive().add_selection(my_selection))

    if "shape" in my[plot]:
        shape = my[plot]["shape"][0]
        chart[plot] = chart[plot].encode(shape=alt.Shape(
            shape,
            type=datatypes[shape],
            legend=alt.Legend(title=my[plot]["shape"][1]),
        ))
    if "column" in my[plot]:
        chart[plot] = chart[plot].encode(column=alt.Column(
            my[plot]["column"][0],
            type=datatypes[my[plot]["column"][0]],
            header=alt.Header(title=my[plot]["column"][1]),
        ))
    if "row" in my[plot]:
        chart[plot] = chart[plot].encode(row=alt.Row(
            my[plot]["row"][0],
            type=datatypes[my[plot]["row"][0]],
            header=alt.Header(title=my[plot]["row"][1]),
        ))
Ejemplo n.º 27
0
def __draw_bubbles(
    plot_table,
    x_metric,
    y_metric,
    ref_group,
    scales,
    interactive_selection_group,
):
    """Draws the bubbles for all metrics."""

    # FILTER DF
    fields_to_keep_in_metric_table = [
        "group_size",
        "attribute_value",
        "total_entities",
        x_metric,
        y_metric,
    ]
    metric_plot_table = plot_table[fields_to_keep_in_metric_table].copy(
        deep=True)

    metric_plot_table["tooltip_group_size"] = plot_table.apply(
        lambda row: get_tooltip_text_group_size(row["group_size"], row[
            "total_entities"]),
        axis=1,
    )

    # COLOR ENCODING
    bubble_color_encoding = alt.condition(
        interactive_selection_group,
        alt.Color("attribute_value:N", scale=scales["color"], legend=None),
        alt.value(Bubble.color_faded),
    )

    # TOOLTIP ENCODING
    bubble_tooltip_encoding = [
        alt.Tooltip(field="attribute_value", type="nominal", title="Group"),
        alt.Tooltip(field="tooltip_group_size",
                    type="nominal",
                    title="Group Size"),
        alt.Tooltip(field=x_metric,
                    type="quantitative",
                    format=".2f",
                    title=x_metric.upper()),
        alt.Tooltip(field=y_metric,
                    type="quantitative",
                    format=".2f",
                    title=y_metric.upper()),
    ]

    # BUBBLE CENTERS
    bubbles_centers = (alt.Chart(metric_plot_table).mark_point(
        filled=True, size=Bubble.center_size).encode(
            x=alt.X(f"{x_metric}:Q", scale=scales["x"], axis=no_axis()),
            y=alt.Y(f"{y_metric}:Q", scale=scales["y"], axis=no_axis()),
            tooltip=bubble_tooltip_encoding,
            color=bubble_color_encoding,
            shape=alt.Shape("attribute_value:N",
                            scale=scales["shape"],
                            legend=None),
        ))

    # BUBBLE AREAS
    bubbles_areas = (alt.Chart(metric_plot_table).mark_circle(
        opacity=Bubble.opacity).encode(
            size=alt.Size("group_size:Q",
                          legend=None,
                          scale=scales["bubble_size"]),
            x=alt.X(f"{x_metric}:Q", scale=scales["x"], axis=no_axis()),
            y=alt.Y(f"{y_metric}:Q", scale=scales["y"], axis=no_axis()),
            tooltip=bubble_tooltip_encoding,
            color=bubble_color_encoding,
        ))

    return bubbles_centers + bubbles_areas
Ejemplo n.º 28
0
def __draw_bubbles(
    plot_table,
    metrics,
    ref_group,
    scales,
    selection,
):
    """Draws the bubbles for all metrics."""

    # X AXIS GRIDLINES
    axis_values = [0.25, 0.5, 0.75]
    x_axis = alt.Axis(
        values=axis_values, ticks=False, domain=False, labels=False, title=None
    )

    # COLOR
    bubble_color_encoding = alt.condition(
        selection,
        alt.Color("attribute_value:N", scale=scales["color"], legend=None),
        alt.value(Bubble.color_faded),
    )

    # CHART INITIALIZATION
    bubble_centers = alt.Chart().mark_point()
    bubble_areas = alt.Chart().mark_circle()

    plot_table["tooltip_group_size"] = plot_table.apply(
        lambda row: get_tooltip_text_group_size(
            row["group_size"], row["total_entities"]
        ),
        axis=1,
    )
    # LAYERING THE METRICS
    for metric in metrics:
        # TOOLTIP
        plot_table[f"tooltip_disparity_explanation_{metric}"] = plot_table.apply(
            lambda row: get_tooltip_text_disparity_explanation(
                row[f"{metric}_disparity_scaled"],
                row["attribute_value"],
                metric,
                ref_group,
            ),
            axis=1,
        )

        bubble_tooltip_encoding = [
            alt.Tooltip(field="attribute_value", type="nominal", title="Group"),
            alt.Tooltip(field="tooltip_group_size", type="nominal", title="Group Size"),
            alt.Tooltip(
                field=f"tooltip_disparity_explanation_{metric}",
                type="nominal",
                title="Disparity",
            ),
            alt.Tooltip(
                field=f"{metric}",
                type="quantitative",
                format=".2f",
                title=f"{metric}".upper(),
            ),
        ]

        # BUBBLE CENTERS
        trigger_centers = alt.selection_multi(empty="all", fields=["attribute_value"])

        bubble_centers += (
            alt.Chart(plot_table)
            .transform_calculate(metric_variable=f"'{metric.upper()}'")
            .mark_point(filled=True, size=Bubble.center_size)
            .encode(
                x=alt.X(f"{metric}:Q", scale=scales["x"], axis=x_axis),
                y=alt.Y("metric_variable:N", scale=scales["y"], axis=no_axis()),
                tooltip=bubble_tooltip_encoding,
                color=bubble_color_encoding,
                shape=alt.Shape(
                    "attribute_value:N", scale=scales["shape"], legend=None
                ),
            )
            .add_selection(trigger_centers)
        )

        # BUBBLE AREAS
        trigger_areas = alt.selection_multi(empty="all", fields=["attribute_value"])

        bubble_areas += (
            alt.Chart(plot_table)
            .mark_circle(opacity=Bubble.opacity)
            .transform_calculate(metric_variable=f"'{metric.upper()}'")
            .encode(
                x=alt.X(f"{metric}:Q", scale=scales["x"], axis=x_axis),
                y=alt.Y("metric_variable:N", scale=scales["y"], axis=no_axis()),
                tooltip=bubble_tooltip_encoding,
                color=bubble_color_encoding,
                size=alt.Size("group_size:Q", legend=None, scale=scales["bubble_size"]),
            )
            .add_selection(trigger_areas)
        )

    return bubble_areas + bubble_centers
Ejemplo n.º 29
0
model = KMeans(n_clusters=5, random_state=2019)

# %%
labels = model.fit_predict(data_kmeans) + 1

# %%
print(labels)

# %%
data_kmeans["label"] = ("Cluster " + pd.Series((labels)).astype(str)).values
data_kmeans[["name", "result",
             "start"]] = data.loc[mask, ["name", "result", "start"]]

# %%
c = (alt.Chart(data_kmeans).mark_point().encode(
    shape=alt.Shape("label:N", legend=alt.Legend(title="Cluster")),
    x="allies_infantry",
    y="axis_infantry",
    color="result",
    tooltip=data_kmeans.columns.tolist(),
).interactive())

c

# %%
data_to_scale = data_kmeans.drop(["label", "name", "start", "result"], axis=1)
data_scaled = scale(data_to_scale)

# %%
labels_scaled = model.fit_predict(data_scaled) + 1
Ejemplo n.º 30
0
	
st.subheader("Shape")

'''The `shape` encoding channel sets the geometric shape used by `point` marks. Unlike the other channels we have seen so far, the shape channel can not be used by other mark types. The shape encoding channel should only be used with nominal data, as perceptual rank-order and magnitude comparisons are not supported.

Let's encode the `cluster` field using shape as well as color. Using multiple channels for the same underlying data field is known as a redundant encoding. The resulting chart combines both color and shape information into a single symbol legend:
'''

with st.echo():
	chart = alt.Chart(data2000).mark_point(filled=True).encode(
		alt.X('fertility:Q'),
		alt.Y('life_expect:Q'),
		alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])),
		alt.Color('cluster:N'),
		alt.OpacityValue(0.5),
		alt.Shape('cluster:N')
	)
	st.write(chart)
	
st.subheader("Tooltips and Ordering")

'''
By this point, you might feel a bit frustrated: we've built up a chart, but we still don't know what countries the visualized points correspond to! Let's add interactive tooltips to enable exploration.

The `tooltip` encoding channel determines tooltip text to show when a user moves the mouse cursor over a mark. Let's add a tooltip encoding for the `country` field, then investigate which countries are being represented.

	chart = alt.Chart(data2000).mark_point(filled=True).encode(
		alt.X('fertility:Q'),
		alt.Y('life_expect:Q'),
		alt.Size('pop:Q', scale=alt.Scale(range=[0,1000])),
		alt.Color('cluster:N'),