def display_scatter_matrix(assets, start_date, end_date, available_metrics,
                           id_info_map):
    metrics = ["PriceUSD", "TxCnt", "VtyDayRet30d"]
    usable_metrics = []
    for metric in metrics:
        if metric in available_metrics:
            usable_metrics.append(metric)

    df = get_aggregated_metrics(assets, metrics)
    df = filter_metrics_by_date(df, start_date, end_date)

    usable_metrics_names = []
    for metric in usable_metrics:
        metric_name = id_info_map[metric][0]
        df[metric_name] = df[metric]
        usable_metrics_names.append(metric_name)

    chart = alt.Chart(df).mark_circle().encode(
        x=alt.X(alt.repeat("column"),
                type="quantitative",
                scale=alt.Scale(type="log")),
        y=alt.Y(alt.repeat("row"),
                type="quantitative",
                scale=alt.Scale(type="log")),
        color="Name:N",
        tooltip=usable_metrics_names).properties(width=200, height=200).repeat(
            row=usable_metrics_names, column=usable_metrics_names)

    st.write(chart)
    def make_chart(self, df):
        variables = [
            'Casos nuevos (último boletín)',
            'Casos nuevos (últimos 7 boletines)'
        ]
        municipalities = self.geography()

        return alt.Chart(municipalities).transform_lookup(
            lookup='properties.NAME',
            from_=alt.LookupData(df, 'Municipio', variables),
            default='0'
        ).mark_geoshape().encode(
            color=alt.Color(
                alt.repeat('row'),
                type='quantitative',
                sort="descending",
                scale=alt.Scale(
                    type='symlog',
                    scheme='redgrey',
                    domainMid=0,
                    # WORKAROUND: Set the domain manually to forcibly
                    # include zero or else we run into
                    # https://github.com/vega/vega-lite/issues/6544
                    domain=alt.DomainUnionWith(unionWith=[0])),
                legend=alt.Legend(orient='left',
                                  titleLimit=400,
                                  titleOrient='left')),
            tooltip=[
                alt.Tooltip(field='properties.NAME', type='nominal'),
                alt.Tooltip(alt.repeat('row'), type='quantitative')
            ]).properties(width=575,
                          height=200).repeat(row=variables).resolve_scale(
                              color='independent').configure_view(
                                  strokeWidth=0).configure_concat(spacing=80)
def get_chart_data(data):
    altchart = alt.Chart(data[CHART]).mark_circle().encode(
        alt.X(alt.repeat("column"), type='quantitative'),
        alt.Y(alt.repeat("row"), type='quantitative'),
        color='Promo:N').properties(width=150, height=150).repeat(
            row=['DayOfWeek', 'Customers', 'Sales'],
            column=['Sales', 'Customers', 'DayOfWeek']).interactive()
    return st.altair_chart(altchart, width=-1)
def show_genre_region_bar(df):

    st.write('## Popular Genres')
    st.write("**What about genres? Let's compare!**")

    st.write("💡 *You can select specific year range with slider*")
    st.write(
        "💡 *Try clicking on one or more genres to see how they perform in different regions*"
    )
    st.write(
        "💡 *You can also click on the individual points in the scatterplot to see other games in its genre*"
    )
    st.write(
        "⌛ *Since it takes time to load the transformed data, the response may take a few seconds. Please be patient.*"
    )

    brush = alt.selection_multi(encodings=['color'])

    min_year, max_year = get_year_range(df)
    selected_year = st.slider("View Popular Genre in Year Range", min_year,
                              max_year, (min_year, max_year), 1)

    new_df = df[(df["Year"] >= selected_year[0])
                & (df["Year"] <= selected_year[1])]

    hist = (alt.Chart(new_df).mark_bar().encode(y=alt.Y(
        alt.repeat('row'), aggregate='sum', type='quantitative'), ))

    color_scheme = alt.condition(brush,
                                 "Genre:N",
                                 alt.value('lightgrey'),
                                 scale=alt.Scale(scheme='tableau20'))

    top_chart = alt.layer(
        hist.encode(x=alt.X("Genre:N", sort='-y'),
                    color=color_scheme)).properties(
                        width=500,
                        height=200).repeat(row=[
                            'Global_Sales', 'NA_Sales', 'EU_Sales', 'JP_Sales',
                            'Other_Sales'
                        ],
                                           data=new_df).add_selection(brush)

    bottom_chart = alt.Chart(new_df).mark_point().encode(
        x=alt.X(alt.repeat("column"),
                type='quantitative',
                scale=alt.Scale(type='sqrt')),
        y=alt.Y(alt.repeat("row"),
                type='quantitative',
                scale=alt.Scale(type='sqrt')),
        color=color_scheme,
        tooltip=['Name']).properties(width=150, height=150).repeat(
            row=["NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales"],
            column=["NA_Sales", "EU_Sales", "JP_Sales",
                    "Other_Sales"]).add_selection(brush)

    complete_chart = alt.vconcat(top_chart, bottom_chart)
    st.write(complete_chart)
Beispiel #5
0
def eda(df, target):
    """
    Generates a dictionary to access summary statistics of the given data frame

    Parameters
    --------
    df : pandas.DataFrame
        input dataframe to be analyzed
    target : string
        target column name

    Returns
    --------
    dict
        access summary statistics of the given data frame.
    cor
        the correlation map

    Examples
    --------
    >>> from propropy import eda
    >>> url1 = "https://archive.ics.uci.edu/ml/machine-learning-databases/"
    >>> url2 = "wine-quality/winequality-red.csv"
    >>> url = url1+url2
    >>> df = pd.read_csv(url, ";")
    >>> target = "quality"
    >>> res = eda(df,quality)
    """
    # Check the dataframe input
    if not isinstance(df, pd.DataFrame):
        raise TypeError("Input data must be an instance of DataFrame")
    # Create an empty dictionary
    res = {}
    # obtain statistical information
    df_fea = df.drop(target, 1)
    num_fea = df_fea.select_dtypes("number").columns.to_list()
    cat_fea = list(set(list(df_fea.columns)) - set(num_fea))
    key_null = list(df_fea.isnull().sum().index)
    val_null = list(df_fea.isnull().sum().values)
    res["nb_missing_values"] = list(zip(key_null, val_null))
    res["nb_cat_features"] = len(cat_fea)
    res["cat_features_name"] = cat_fea
    res["nb_num_features"] = len(num_fea)
    res["num_features_name"] = num_fea
    res["nb_class"] = len(list(set(df[target])))
    class_count = df[target].value_counts(normalize=True).values
    res["class_ratio"] = list(class_count.round(4))
    # Create a pair plots with Altair
    color_lab = target + ":N"
    chart = (alt.Chart(df).mark_circle().encode(
        alt.X(alt.repeat("column"), type="quantitative"),
        alt.Y(alt.repeat("row"), type="quantitative"),
        color=color_lab,
    ).properties(width=100, height=100).repeat(row=num_fea, column=num_fea))
    res["pairplot"] = chart
    return res
Beispiel #6
0
def pairplot(data, vars=None):
    if vars is None:
        vars = list(data.columns)

    chart = alt.Chart(data).mark_circle().encode(
        alt.X(alt.repeat("column"), type="quantitative"),
        alt.Y(alt.repeat("row"), type="quantitative"),
        color="Origin:N").properties(width=300, height=300).repeat(
            row=vars, column=vars).interactive()
    return chart
Beispiel #7
0
def plot_scatter_matrix(df, color_field, x_y_prefix, tooltip_fields, size):
    repeated_facets = df.columns[df.columns.str.match(x_y_prefix)]
    scatter_matrix = (alt.Chart(df).mark_circle().encode(
        alt.X(alt.repeat("column"), type="quantitative"),
        alt.Y(alt.repeat("row"), type="quantitative"),
        color=f"{color_field}",
        tooltip=tooltip_fields,
    ).properties(width=size,
                 height=size).repeat(row=list(repeated_facets),
                                     column=list(repeated_facets[::-1])))
    return scatter_matrix
Beispiel #8
0
def graphMultiSimStats(simList):
    wideScores = multiSimStats(simList)
    display(alt.Chart(wideScores).mark_circle().encode(
        alt.X(alt.repeat("column"), type='quantitative'),
        alt.Y(alt.repeat("row"), type='quantitative'),
    ).properties(
        width=250,
        height=250
    ).repeat(
        row=list(wideScores.columns[:-3].values),
        column=['FirstHPayoff', 'FirstLPayoff']
    ))
Beispiel #9
0
def vega_example3():

    brush = alt.selection_interval()

    chart = (alt.Chart(cars).mark_point().encode(
        alt.X(alt.repeat("column"), type="quantitative"),
        alt.Y(alt.repeat("row"), type="quantitative"),
        color=alt.condition(brush, "Origin:N", alt.value("gray")),
    ).add_selection(brush).properties(width=250, height=250).repeat(
        row=["Horsepower", "Miles_per_Gallon"],
        column=["Acceleration", "Displacement"],
    ))

    return jsonify(chart.to_dict())
Beispiel #10
0
def example_scatterplot_matrix():
    import altair as alt
    from vega_datasets import data

    df = data.iris()

    p = alt.Chart(df).mark_circle().encode(
        x=alt.X(alt.repeat('column'), type='quantitative'),
        y=alt.Y(alt.repeat('row'), type='quantitative'),
        color='species:N').properties(width=150, height=150).repeat(
            row=['sepalLength', 'sepalWidth', 'petalLength', 'petalWidth'],
            column=['sepalLength', 'sepalWidth', 'petalLength',
                    'petalWidth']).interactive()

    return p
def scatter_matrix(df, x, z, output_directory=None, custom_scheme='dark2'):
    '''Requires altair and altair saver. Plots a scatter matrix of a list of variables (x), where z 
	is a factor that changes point color (optional). Option to save a html file to output directory. '''
    x_inverse = x[::-1]
    chart = alt.Chart(df).mark_circle().encode(
        alt.X(alt.repeat("column"), type='quantitative'),
        alt.Y(alt.repeat("row"), type='quantitative'),
        color=alt.Color(z + ':N',
                        scale=alt.Scale(scheme=custom_scheme))).properties(
                            width=150,
                            height=150).repeat(row=x,
                                               column=x_inverse).interactive()
    if output_directory != None:
        chart.save(output_directory + 'Scatterplot Matrix.html')
    return chart
Beispiel #12
0
def pairplot(data, hue=None, vars=None):
    if vars is None:
        vars = list(data.columns)

    chart = alt.Chart(data).mark_circle().encode(
                alt.X(alt.repeat("column"), type='quantitative'),
                alt.Y(alt.repeat("row"), type='quantitative'),
                color='{hue}:N'.format(hue=hue)
            ).properties(
                width=250,
                height=250
            ).repeat(
                row=vars,
                column=vars
            )
    return chart
Beispiel #13
0
def generate_align_vs_features(data_frame, output_folder, file_name):
    """
    Generates a chart of the relation between align and other features in dataset.
    Also saves resulting image as file in given output folder.
    Parameters:
    -----------
    data_frame : pandas.DataFrame
        input path to be verified
    output_folder : str
        output folder path to save the chart
    file_name : str
        file name for generated chart image
        
    Returns:
    -----------
    None
    """
    features = ['id', 'eye', 'hair', 'sex', 'gsm', 'publisher']
    align_vs_features = (alt.Chart(data_frame).mark_circle().encode(
        alt.Y(alt.repeat(), type='ordinal'),
        alt.X('count()', title = "Character Count"),
        size =alt.Size('count()', legend=alt.Legend(title="Characters")),
        color = alt.Color("align", legend=alt.Legend(title="Alignment"))
        ).properties(height=300, width=200).repeat(repeat=features, columns=3))

    save(align_vs_features, output_folder +"/figures/" + file_name + '.png', method='selenium', webdriver=driver)
    if verbose: print("Alignment vs features chart created, saved to " + 
                      output_folder + 
                      "/figures/" + 
                      file_name + 
                      '.png')
Beispiel #14
0
def make_cat_plot(dat, cat_list):
    """
    plot count of categorical features.

    Parameters
    ----------
    list: cat_list 
        list of strings contains features name
    DataFrame: dat
        input dataset

    Returns
    -------
    altair.vegalite.v3.api.Chart
        altair plots

    Examples
    --------
        make_cat_plot(bank_train, ['job', 'default'])

    """

    cat_p = alt.Chart(dat).mark_bar(opacity=0.8).encode(
        alt.X("count()"),
        alt.Y(alt.repeat("row"), type='nominal'),
        color=alt.Color('y', legend=None)).properties(
            width=200, height=150).repeat(row=cat_list)
    return cat_p
Beispiel #15
0
def create_map_series(df):
    # Note that a LayeredChart cannot contain faceted charts as its elements.
    # Therefore, we cannot use a basemap in this configuration, since it would produce an invalid specification.
    # More info: https://github.com/altair-viz/altair/issues/785

    df2 = df.groupby('context').mean()
    df2['context'] = df2.index
    print(df2)

    grid = alt.topo_feature(
        'https://raw.githubusercontent.com/anitagraser/sandbox/master/grid40.topojson',
        'grid40')
    variable_list = [
        'distance_error', 'along_track_error', 'cross_track_error'
    ]
    map_chart = alt.Chart(grid, title=SHIPTYPE).mark_geoshape(
        stroke='white', strokeWidth=2).encode(
            alt.Color(
                alt.repeat('column'),
                type='quantitative',
                scale=alt.Scale(domain=(0, 2000)))).transform_lookup(
                    lookup='properties.id',
                    from_=alt.LookupData(
                        df2, 'context',
                        variable_list)).project(type='mercator').properties(
                            width=300, height=200).repeat(
                                column=variable_list).resolve_scale(
                                    color='shared')

    with open(INPUT.replace('.csv', '_{}_map_series.vega'.format(SHIPTYPE)),
              'w') as map_output:
        map_output.write(map_chart.to_json(indent=2))
Beispiel #16
0
def j_summary(samples, ctype='hist', properties={'width': 800}):
    #     print(vcov)
    if type(samples) == dict:
        print_summary(samples, 0.89, False)
        df = pd.DataFrame(samples).clean_names()
    else:
        print_summary(dict(zip(samples.columns, samples.T.values)), 0.89,
                      False)
        df = samples
    display(df.corr())
    df = df if len(df) < 5000 else df.sample(n=4000)
    base = alt.Chart(df).properties(height=30)

    if ctype == 'density':
        l = [
            base.mark_line().transform_density(
                row,
                as_=[row, 'density'],
            ).encode(alt.X(f'{row}:Q'), alt.Y('density:Q'))
            for row in df.columns
        ]

        density = alt.vconcat(*l)
        return_chart = density

    if ctype == 'hist':
        hist = base.mark_bar().encode(
            alt.X(bin=alt.Bin(maxbins=20),
                  field=alt.repeat("row"),
                  type='quantitative'),
            y=alt.Y(title=None, aggregate='count',
                    type='quantitative')).repeat(row=[c for c in df.columns])
        return_chart = hist

    display(return_chart)
Beispiel #17
0
def draw_numeric_plot(train_df):
    """draw numeric plot

    Args:
        train_df (pd.DataFrame): train data split as a pandas dataframe

    Returns:
        alt.RepeatChart: plot object of numeric plot, repeated for each numeric column
    """
    num_cols = list(train_df.select_dtypes(include=np.number).iloc[:,1:].columns)

    num_plot = alt.Chart(train_df).mark_area(
        opacity=0.5,
        interpolate='monotone'
    ).encode(
        alt.X(alt.repeat("repeat"), type='quantitative', scale=alt.Scale(zero=False), bin=alt.Bin(maxbins=100)),
        alt.Y('count()', stack=None),
        fill='good_wine'
    ).properties(
        height=200,
        width=200
    ).repeat(
        repeat=num_cols,
        columns = 4
    ).configure_axis(labels=False)

    return num_plot
def step4_related_factors():
    # next_block()
    st.header("Step4: Factors that may affect CO2 emissions")
    st.write("Tips:")
    st.write("1. Add indicators you want to compare with CO2 emissions!")
    st.write("2. Put your mouse on a country and compare across indicators!")
    st.write("3. Remember to play with the year slide bar :)")

    slider = alt.binding_range(min=1991, max=2011, step=1)
    select_year = alt.selection_single(name="Year",
                                       fields=['Year'],
                                       bind=slider,
                                       init={'Year': 2011})
    highlight = alt.selection_single(
        on='mouseover', fields=['Country Name'],
        empty='all')  # init={"Country Name": "United States"})

    dataset2 = st.multiselect("Choose factors to compare!", [
        "CO2 emissions per GDP", "CO2 emissions (kt)",
        "CO2 emissions per capita", "Urban population (% of total)",
        "Renewable energy consumption (% of total final energy consumption)",
        "Forest area (% of land area)",
        "Marine protected areas (% of territorial waters)",
        "Population growth (annual %)",
        "Renewable electricity output % of total",
        "Terrestrial protected areas % of total",
        "Total greenhouse gas emissions (kt of CO2 equivalent)"
    ], [
        "CO2 emissions (kt)", "CO2 emissions per GDP",
        "Renewable electricity output % of total"
    ])

    st.write(
        alt.hconcat(
            world_map_for_factors(highlight, dataset2, select_year),
            alt.Chart(df).mark_point().encode(
                alt.X(alt.repeat("column"), type='quantitative'),
                alt.Y(alt.repeat("row"), type='quantitative'),
                color='Country Name:N',
            ).properties(
                width=160,
                height=160,
            ).repeat(
                row=dataset2,
                column=dataset2,
            ).transform_filter(select_year).interactive()))
Beispiel #19
0
def scatter_matrix():
    chart = (
        alt.Chart(df)
        .mark_circle()
        .encode(
            alt.X(alt.repeat("column"), type="quantitative"),
            alt.Y(alt.repeat("row"), type="quantitative"),
            color="published_day",
        )
        .properties(width=150, height=150)
        .repeat(
            row=["duration", "views", "comments"],
            column=["comments", "views", "duration"],
        )
        .interactive()
    )
    st.altair_chart(chart)
Beispiel #20
0
def scatter_matrix(df,
                   color: Union[str, None] = None,
                   alpha: float = 1.0,
                   tooltip: Union[List[str], tooltipList, None] = None,
                   **kwargs) -> alt.Chart:
    """ plots a scatter matrix

    At the moment does not support neither histogram nor kde;
    Uses f-f scatterplots instead. Interactive and with a cusotmizable
    tooltip

    Parameters
    ----------
    df : DataFame
        DataFame to be used for scatterplot. Only numeric columns will be included.
    color : string [optional]
        Can be a column name or specific color value (hex, webcolors).
    alpha : float
        Opacity of the markers, within [0,1]
    tooltip: list [optional]
        List of specific column names or alt.Tooltip objects. If none (default),
        will show all columns.
    """
    dfc = _preprocess_data(df)
    tooltip = _process_tooltip(tooltip) or dfc.columns.tolist()
    cols = dfc._get_numeric_data().columns.tolist()

    chart = (alt.Chart(dfc).mark_circle().encode(
        x=alt.X(alt.repeat("column"), type="quantitative"),
        y=alt.X(alt.repeat("row"), type="quantitative"),
        opacity=alt.value(alpha),
        tooltip=tooltip,
    ).properties(width=150, height=150))

    if color:
        color = str(color)

        if color in dfc:
            color = alt.Color(color)
            if "colormap" in kwargs:
                color.scale = alt.Scale(scheme=kwargs.get("colormap"))
        else:
            color = alt.value(color)
        chart = chart.encode(color=color)

    return chart.repeat(row=cols, column=cols).interactive()
Beispiel #21
0
def omniplot(folder="./Cifar10/ResNet101/exp1/students"):
    data = load_data(folder)

    detalle = [
        'test_acc', 'test_loss', 'test_eval', 'train_acc', 'train_loss',
        'train_eval', 'temp'
    ]
    chart = alt.Chart(data).mark_point().encode(
        alt.X(alt.repeat("column"),
              type='quantitative',
              scale=alt.Scale(zero=False, base=10, type='log')),
        alt.Y(alt.repeat("row"),
              type='quantitative',
              scale=alt.Scale(zero=False, base=10, type='log')),
        shape='student',
        color='distillation').properties(width=150,
                                         height=150).repeat(row=detalle,
                                                            column=detalle)
    return chart
Beispiel #22
0
def create_text(lines, nearest):
    """
    Component of Altair plot creation.
    Draw text labels near the points, and highlight based on selection
    """
    text = lines.mark_text(align='left', dx=5, dy=-5).encode(
        text=alt.condition(nearest, alt.Y(alt.repeat("row"),
                                          type='quantitative'),
                           alt.value(' '))
    )
    return text
Beispiel #23
0
 def hist_frame(self, column=None, layout=(-1, 2), **kwargs):
     if column is not None:
         if isinstance(column, str):
             column = [column]
     data = self._preprocess_data(with_index=False, usecols=column)
     data = data._get_numeric_data()
     nrows, ncols = _get_layout(data.shape[1], layout)
     return (alt.Chart(data, mark=self._get_mark_def("bar", kwargs)).encode(
         x=alt.X(alt.repeat("repeat"), type="quantitative", bin=True),
         y=alt.Y("count()", title="Frequency"),
     ).repeat(repeat=list(data.columns), columns=ncols))
Beispiel #24
0
def multiscatter(data,
                 columns=None,
                 group_by=None,
                 color=None,
                 tooltip=None,
                 height=300,
                 width=400):
    """Generate many scatter plots.

    Based on several columns, pairwise.
    """
    kwargs = choose_kwargs(from_=locals(), which=["color", "tooltip"])

    assert group_by is None, "Long format not supported yet"
    return (alt.Chart(data,
                      height=height / len(columns),
                      width=width /
                      len(columns)).mark_point(size=1 / len(columns)).encode(
                          alt.X(alt.repeat("column"), type="quantitative"),
                          alt.Y(alt.repeat("row"), type="quantitative"),
                          **kwargs).repeat(row=columns, column=columns))
def app():

    st.title("Let's demo Streamlit basics")

    st.sidebar.markdown("**Some tools we'll use:**")
    st.sidebar.markdown("""
        * Markdown
        * Pandas
        * Altair""")

    st.markdown("""## Markdown and Pandas
I am writing Markdown but I can also show a pandas DataFrame.
  """)

    df_cars = data.cars()
    st.write(df_cars.head())

    st.markdown("""## Altair
And I can easily make interactive altair plots.
  """)

    brush = alt.selection_interval(encodings=['x', 'y'])
    repeat_chart = alt.Chart(df_cars).mark_point().encode(
        alt.X(alt.repeat('column'), type='quantitative'),
        alt.Y('Miles_per_Gallon:Q'),
        color=alt.condition(brush, 'Origin:N', alt.value('lightgray')),
        opacity=alt.condition(
            brush, alt.value(0.7), alt.value(0.1))).properties(
                width=150, height=150).add_selection(brush).repeat(
                    column=['Weight_in_lbs', 'Acceleration', 'Horsepower'])

    st.write(repeat_chart)

    st.markdown("""## User Input
I can create a text input field to get input from the user.
  """)

    n = int(st.text_input("How many points do you want plotted:", '100'))

    x = np.random.random(n) * 10
    y = np.random.random(n) * 10
    s = np.random.random(n)

    df = pd.DataFrame({'x': x, 'y': y, 'size': s})

    chart = alt.Chart(df, width=400,
                      height=400).mark_point().encode(
                          x='x',
                          y='y',
                          size=alt.Size('size', legend=None),
                          tooltip=['size']).interactive()

    st.write(chart)
Beispiel #26
0
def correlation_scatter_charts(d: pd.DataFrame, title: str = '') -> alt.Chart:
  """Produces a grid of scatter plots of runtimes of stages versus covariates.

  Args:
    d: A pandas dataframe of runtime by regions.
    title: A title for the plot.

  Returns:
    An altair chart
  """
  columns_used = ['region', 'total runtime'] + RUNTIME_COLUMNS + COUNT_COLUMNS
  d = d[columns_used]
  return alt.Chart(d).mark_circle(opacity=0.1).encode(
      x=alt.X(alt.repeat('column'), type='quantitative',
              axis=alt.Axis(labelExpr="datum.value + 's'")),
      y=alt.Y(alt.repeat('row'), type='quantitative'),
      tooltip='region'
  ).properties(width=100, height=100) \
  .repeat(
      column=['total runtime'] + RUNTIME_COLUMNS,
      row=COUNT_COLUMNS,
  ).properties(title=title)
Beispiel #27
0
def create_lines(source, datetime_col="Time", logger_col="Location"):
    """
    Component of Altair plot creation.
    create lines object for an Altair plot
    """
    lines = alt.Chart(source).mark_line(
    ).encode(
        alt.X(datetime_col, type='temporal'),
        alt.Y(alt.repeat("row"), type='quantitative',
              scale=alt.Scale(zero=False)),
        color=f'{logger_col}:N',
    )
    return lines
Beispiel #28
0
def app():

    st.title("Streamlit demo for 202101")

    st.markdown(""" ## Some section
    This is great!
    * bullet 1
    * bullet 2
    """)

    st.sidebar.markdown('''This is the sidebar''')

    df_cars = data.cars()
    st.write(df_cars.head())

    brush = alt.selection_interval(encodings=['x', 'y'])
    repeat_chart = alt.Chart(df_cars).mark_point().encode(
        alt.X(alt.repeat('column'), type='quantitative'),
        alt.Y('Miles_per_Gallon:Q'),
        color=alt.condition(brush, 'Origin:N', alt.value('lightgray')),
        opacity=alt.condition(
            brush, alt.value(0.7), alt.value(0.1))).properties(
                width=150, height=150).add_selection(brush).repeat(
                    column=['Weight_in_lbs', 'Acceleration', 'Horsepower'])

    st.write(repeat_chart)

    st.markdown(""" ## User Input Demo
    """)

    n = int(st.text_input('How many point do you want', '100'))

    x = np.random.random(n) * 10
    y = np.random.random(n) * 10
    s = np.random.random(n)

    df = pd.DataFrame({'x': x, 'y': y, 'size': s})

    chart = alt.Chart(df, width=400,
                      height=400).mark_point().encode(
                          x='x',
                          y='y',
                          size=alt.Size('size', legend=None),
                          tooltip=['size']).interactive()

    st.write(chart)
Beispiel #29
0
def facetted_histogram(df):
    """Facet of one histogram per column with cross filter interaction"""
    brush = alt.selection(type='interval', encodings=['x'])

    base = (alt.Chart().mark_bar().encode(
        x=alt.X(alt.repeat('column'),
                type='quantitative',
                bin=alt.Bin(maxbins=20)),
        y=alt.Y('count()', axis=alt.Axis(title='')),
    ).properties(width=200, height=150))

    background = base.encode(color=alt.value('#ddd')).add_selection(brush)

    highlight = base.transform_filter(brush)

    chart = alt.layer(background, highlight,
                      data=df).repeat(column=list(df.columns))

    return chart
Beispiel #30
0
def numericHistograms(data):
    """
    Generate a chart for all numeric values in Pandas DataFrame

    For all the numeric columns in a Pandas Dataframe, this function
    will generate a Histogram to show how the data is distributed

    Attributes
    ----------
    data : pd.DataFrame, default None
    A Pandas DataFrame that contains numeric values
    """
    numericColumns = data.select_dtypes(np.number)
    columns = numericColumns.columns.tolist()

    chart = alt.Chart(data, height=100).mark_bar().encode(
        x=alt.X(alt.repeat('row'), type='quantitative', bin=True),
        y=alt.Y("count()", type='quantitative')).repeat(row=columns)
    return chart
Beispiel #31
0
"""
Repeated Choropleth Map
=======================
Three choropleths representing disjoint data from the same table.
"""
# category: maps
import altair as alt
from vega_datasets import data

states = alt.topo_feature(data.us_10m.url, 'states')
source = data.population_engineers_hurricanes.url
variable_list = ['population', 'engineers', 'hurricanes']

alt.Chart(states).mark_geoshape().encode(
    alt.Color(alt.repeat('row'), type='quantitative')
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'id', variable_list)
).properties(
    width=500,
    height=300
).project(
    type='albersUsa'
).repeat(
    row=variable_list
).resolve_scale(
    color='independent'
)
"""
# category: interactive charts
import altair as alt
from vega_datasets import data

source = alt.UrlData(
    data.flights_2k.url,
    format={'parse': {'date': 'date'}}
)

brush = alt.selection(type='interval', encodings=['x'])

# Define the base chart, with the common parts of the
# background and highlights
base = alt.Chart().mark_bar().encode(
    x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)),
    y='count()'
).properties(
    width=180,
    height=130
)

# blue background with selection
background = base.properties(selection=brush)

# yellow highlights on the transformed data
highlight = base.encode(
    color=alt.value('goldenrod')
).transform_filter(brush)

# layer the two charts & repeat