Exemple #1
0
            color = "#81F7D8"
        if "m4" in name:
            color = "#D20338"
        if "weight" in name:
            color = "#2E2EFE"
        if "random" in name:
            color = "#FFCE54"
        for value_label in value_labels:
            # 2.绘制JSON图像
            print("# 2.绘制第%d幅%s折线图像..." % (i + 1, value_label))

            alt.Chart(json_data).mark_line().encode(
                alt.X(scale=alt.Scale(domain=[start_time, end_time]),
                      field=time_label,
                      type="temporal",
                      axis=alt.Axis(title="", labelFontSize=20)),
                alt.Y(scale=alt.Scale(domain=[min_value, max_value]),
                      field=value_label,
                      type="quantitative",
                      axis=alt.Axis(title="", labelFontSize=20)),
                alt.Color(value=color)).properties(
                    width=width, height=height).save(line_chart)
            os.system("mv *%s %s" %
                      (line_postfix, value_label + "_" + line_dir))

            print("# 2.绘制第%d幅%s面积图像..." % (i + 1, value_label))

            alt.Chart(json_data).mark_area().encode(
                alt.X(scale=alt.Scale(domain=[start_time, end_time]),
                      field=time_label,
                      type="temporal",
Exemple #2
0
# Plot Altair 1: Per country total cases and cases/million populations

source = final_df

#base configuration
base = alt.Chart(source).encode(
    alt.X('Country/Region:N', sort=None),
    tooltip=['Country/Region', 'confirmed', 'cases/million'
             ]).properties(height=500, title='Total Confirmed Cases/Country')

#base title configuration

#bar chart
bar = base.mark_bar(color='#5276A7').encode(
    alt.Y('confirmed:Q', axis=alt.Axis(titleColor='#5276A7')))
#point for cases/million and its axis
point = base.mark_circle(size=60, color='red').encode(
    alt.Y('cases/million:Q', axis=alt.Axis(titleColor='red')))
#merge the plot
alt.layer(bar, point).resolve_scale(y='independent')

# Plot Altair 2: Global aggregates confirmed, recovered, and deaths (Im not using this at my webapp, only for example)
source = timeseries_final

base = alt.Chart(source).encode(x='date:T')

line1 = base.mark_line(color='green').encode(y='total confirmed:Q')

line2 = base.mark_line(color='blue').encode(y='total recovered:Q')
line3 = base.mark_line(color='red').encode(y='total deaths:Q')
Exemple #3
0
def result_heatmap(data, result="win", title=None,
                   width=500, height=500):
    """
    Function that takes a player's history data and returns an altair chart
    showing their winning percentage based on their hand totals and the
    dealer's up card
    """
    possible_results = ["win", "loss", "push", "surrender"]
    assert result in possible_results, (
        "'result' must be 'win', 'loss', or 'push'"
    )
    if not title:
        title = f"{result.title()} Percentage"
    # convert data to a DataFrame if it's just a player's history list
    if isinstance(data, list):
        data = pd.DataFrame(data)

    # remove any hands where the dealer had blackjack or the player busted
    sub_data = data[(data["dealer_blackjack"] == 0) &
                    (data["total"] <= 21)].copy()
    # calculate winning percentage for each total and dealer up card combo
    grouped_pct = sub_data.groupby(
        ["total", "dealer_up"]
    ).apply(results_pct, as_series=False)
    # unpack the tuple returned by groupby function and rename columns
    grouped_pct = grouped_pct.apply(pd.Series)
    grouped_pct.columns = possible_results
    # reset index and sort for plotting
    pct_data = grouped_pct.reset_index().sort_values("total", ascending=False)
    # dynamically determine how the legend should be labeled
    min_val = round(min(pct_data[possible_results].min()), 1)
    max_val = round(max(pct_data[possible_results].max()), 1)
    min_int = int(min_val * 10)
    max_int = int(max_val * 10)
    values = [
        round(x * 0.1, 1) for x in range(min_int, max_int + 1)
    ]
    # create altair heatmap
    chart = alt.Chart(
        pct_data, title=title, width=width, height=height
    ).mark_rect(binSpacing=1).encode(
        x=alt.X(
            "dealer_up:O",
            axis=alt.Axis(orient="top", labelAngle=0),
            title="Dealer Up Card"
        ),
        y=alt.Y(
            "total:O",
            title="Player Total",
            sort=alt.EncodingSortField(op="mean", order="descending")
        ),
        color=alt.Color(
            f"{result}:Q",
            legend=alt.Legend(
                title=f"{result.title()} Probability",
                values=values
            )
        ),
        tooltip=[
            alt.Tooltip("dealer_up", title="Dealer Up Card"),
            alt.Tooltip("total", title="Player Total"),
            alt.Tooltip(f"{result}", title=f"{result.title()} Probability")
        ]
    )

    return chart
Exemple #4
0
source2 = [
    {
        "start": "1933",
        "end": "1945",
        "event": "Nazi Rule"
    },
    {
        "start": "1948",
        "end": "1989",
        "event": "GDR (East Germany)"
    },
]

source = alt.pd.DataFrame(source)
source2 = alt.pd.DataFrame(source2)

line = alt.Chart(source).mark_line(color="#333").encode(
    x=alt.X("year:T", axis=alt.Axis(format="%Y"), title="Year"),
    y=alt.Y("population", title="Population"),
)

point = line.mark_point(color="#333")

rect = alt.Chart(source2).mark_rect().encode(x="start:T",
                                             x2="end:T",
                                             color=alt.Color("event:N",
                                                             title="Event"))

(rect + line + point).properties(
    title="Population of Falkensee from 1875 to 2014", width=500, height=300)
Exemple #5
0
def precipitation_dashboard():  
    
    try:
        db_precipitation = sqlite3.connect(db_path_precipitation, check_same_thread=False)
        db_precipitation_precip_data = pd.read_sql_query(sql="SELECT * FROM precipitation", con=db_precipitation)
        db_precipitation_stations = pd.read_sql_query(sql="SELECT * FROM station", con=db_precipitation)
        
        db_precipitation_precip_data["wateryear"] = [extract_wateryear(month, year) for month, year in zip(db_precipitation_precip_data["MONTH"], db_precipitation_precip_data["YEAR"])]
        data = db_precipitation_precip_data
        data_water_year_baran = data[['stationCode', 'wateryear', 'JAM_BARAN']].groupby(['stationCode', 'wateryear']).sum().reset_index()
        
    except:
        print("ERROR LOAD DATA FROM DATABASE")

    map = folium.Map(
        location=[db_precipitation_stations["latDecimalDegrees"].mean(), db_precipitation_stations["longDecimalDegrees"].mean()],
        tiles='Stamen Terrain',
        zoom_start=7
    )
    
    
    for i in range(len(db_precipitation_stations)):       
        
        
        data_st = data_water_year_baran[data_water_year_baran["stationCode"] == db_precipitation_stations.stationCode[i]]
        
        # create an altair chart, then convert to JSON
        bar = alt.Chart(data_st, width=600).mark_bar().encode(
            x=alt.X('wateryear:O', axis=alt.Axis(title='سال آبی')),
            y=alt.Y('JAM_BARAN:Q', axis=alt.Axis(title='بارندگی - میلیمتر'))
        )
        
        
        rule = alt.Chart(data_st).mark_rule(color='red').encode(
            y='mean(JAM_BARAN):Q'
        )
        
        chart = (bar + rule).properties(title=db_precipitation_stations.stationName[i]).configure_axisY(
            labelFontSize=16,
            labelFont="B Zar",
            titleFont="B Zar",
            titleFontSize=16
        ).configure_axisX(
            labelFontSize=16,
            labelFont="B Zar",
            titleFont="B Zar",
            titleFontSize=16
        ).configure_title(
            fontSize=20,
            font="B Titr",
        )
        
        chart.configure_title(
            align="left"
        )
        
        vis = chart.to_json()                

        folium.Marker(
            location=[db_precipitation_stations["latDecimalDegrees"][i], db_precipitation_stations["longDecimalDegrees"][i]],
            popup=folium.Popup(max_width=700).add_child(folium.features.VegaLite(vis, width=700, height=300)),
            tooltip="کلیک کنید"
        ).add_to(map)

    return render_template(
        template_name_or_list='precipitation_flask/precipitation_dashboard.html',
        map=map._repr_html_()
    )
Exemple #6
0
def confirmed():
    data = "data/confirmed.csv"
    filename = "graphs/confirmed.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)
    df["date"] = pd.to_datetime(df["date"])
    df = df.loc[df["source"] == "fhi:git"]
    df["new_sma7"] = df.new.rolling(window=7).mean().shift()

    df = df.melt(
        id_vars=["date"],
        value_vars=["new", "new_sma7", "total"],
        var_name="category",
        value_name="value",
    ).dropna()

    rename = {"new": "New cases", "new_sma7": "Avg 7 d.", "total": "Cumulative"}

    df["category"] = df["category"].replace(rename)

    base = alt.Chart(
        df,
        title="Number of reported COVID-19 cases by specimen collection date (Source: FHI)",
    ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)))

    bar = (
        base.transform_filter(alt.datum.category == "New cases")
        .mark_bar(color="#FFD1D1")
        .encode(y=alt.Y("value:Q", axis=alt.Axis(title="New per day", grid=True)))
    )

    line = (
        base.transform_filter(alt.datum.category == "Cumulative")
        .mark_line(color="#2E507B", strokeWidth=3)
        .encode(
            y=alt.Y("value:Q", axis=alt.Axis(title="Cumulative")),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=["New cases", "Avg 7 d.", "Cumulative"],
                    range=["#FFD1D1", "red", "#2E507B"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
    )

    ma7 = (
        base.transform_filter(alt.datum.category == "Avg 7 d.")
        .mark_line(opacity=0.8)
        .encode(y=alt.Y("value:Q"), color=alt.Color("category:N"))
    )

    chart = (
        alt.layer(bar + ma7, line)
        .resolve_scale(y="independent")
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=480,
            legendY=655,
        )
    )

    chart.save(filename)
np.random.seed(42)

# Generating random data
df = pd.DataFrame({'samples': np.random.normal(50, 15, 100).astype(int).astype(str)})

# Splitting stem and leaf
df['stem'] = df['samples'].str[:-1]
df['leaf'] = df['samples'].str[-1]

df = df.sort_values(by=['stem', 'leaf'])

# Determining leaf position
df['position'] = df.groupby('stem').cumcount().add(1)

# Creating stem and leaf plot
alt.Chart(df).mark_text(
    align='left',
    baseline='middle',
    dx=-5
).encode(
    alt.X('position:Q',
        axis=alt.Axis(title='', ticks=False, labels=False, grid=False)
    ),
    alt.Y('stem:N', axis=alt.Axis(title='', tickSize=0)),
    text='leaf:N'
).configure_axis(
    labelFontSize=20
).configure_text(
    fontSize=20
)
Exemple #8
0
def show_visualization():

    #load in data
    fb_mask_original = load_data("fb_mask.csv")
    fb_sympton_original = load_data("fb_sympton.csv")
    fb_sympton = copy.deepcopy(fb_sympton_original)
    fb_mask = copy.deepcopy(fb_mask_original)
    fb_mask['time_value'] = pd.to_datetime(fb_mask['time_value'],
                                           format='%Y/%m/%d')
    fb_sympton['time_value'] = pd.to_datetime(fb_sympton['time_value'],
                                              format='%Y/%m/%d')
    fb_mask.rename(columns={'value': 'mask_percentage'}, inplace=True)
    fb_sympton.rename(columns={'value': 'sympton_percentage'}, inplace=True)

    fb_all = fb_mask.merge(fb_sympton, on=['time_value', 'geo_value'])
    fb_all = fb_all[[
        'geo_value', 'time_value', 'mask_percentage', 'sympton_percentage'
    ]]
    fb_all = fb_all[fb_all['time_value'] > '2020-09-08']

    states = fb_all.geo_value.str.upper().unique()

    #first plot: correlation between wearing mask and having symptons
    st.title(
        "Let`s see the correlation between wearing mask and having symptons.")

    state_choice = st.sidebar.multiselect(
        "Which state are you interested in?",
        states.tolist(),
        default=['AK', 'AL', 'AR', 'AZ', 'CA', 'CO'])

    date_range = st.sidebar.date_input(
        "Which range of date are you interested in? Choose between %s and %s" %
        (min(fb_all['time_value']).strftime('%Y/%m/%d'),
         max(fb_all['time_value']).strftime('%Y/%m/%d')),
        [min(fb_all['time_value']),
         max(fb_all['time_value'])])

    fb_temp = fb_all[fb_all['geo_value'].str.upper().isin(state_choice)]

    if len(date_range) == 2:
        fb_selected = fb_temp[
            (fb_temp['time_value'] >= pd.to_datetime(date_range[0]))
            & (fb_temp['time_value'] <= pd.to_datetime(date_range[1]))]
    else:
        fb_selected = fb_temp[(fb_temp['time_value'] >= pd.to_datetime(
            date_range[0]))]

    scatter_chart = alt.Chart(fb_selected).mark_circle().encode(
        x=alt.X('mask_percentage',
                scale=alt.Scale(zero=False),
                axis=alt.Axis(title='percentage of wearing masks')),
        y=alt.Y('sympton_percentage',
                scale=alt.Scale(zero=False),
                axis=alt.Axis(title='percentage of having covid symptons')))
    scatter_chart + scatter_chart.transform_regression(
        'mask_percentage', 'sympton_percentage').mark_line()

    map_data = fb_all[fb_all['time_value'] == pd.to_datetime(
        date_range[0])].copy()
    ids = [
        2, 1, 5, 4, 6, 8, 9, 11, 10, 12, 13, 15, 19, 16, 17, 18, 20, 21, 22,
        25, 24, 23, 26, 27, 29, 28, 30, 37, 38, 31, 33, 34, 35, 32, 36, 39, 40,
        41, 42, 44, 45, 46, 47, 48, 49, 51, 50, 53, 55, 54, 56
    ]
    map_data['id'] = ids

    states = alt.topo_feature(data.us_10m.url, 'states')
    variable_list = ['mask_percentage', 'sympton_percentage']

    chart = alt.Chart(states).mark_geoshape().encode(
        alt.Color(alt.repeat('row'), type='quantitative')).transform_lookup(
            lookup='id',
            from_=alt.LookupData(map_data, 'id', variable_list)).properties(
                width=500, height=300).project(type='albersUsa').repeat(
                    row=variable_list).resolve_scale(color='independent')

    st.write(chart)
Exemple #9
0
}])
open_close_color = alt.condition("datum.open < datum.close",
                                 alt.value("#06982d"), alt.value("#ae1325"))

rule = alt.Chart(source).mark_rule().encode(alt.X(
    'yearmonthdate(date):T',
    scale=alt.Scale(domain=[{
        "month": 5,
        "date": 31,
        "year": 2009
    }, {
        "month": 7,
        "date": 1,
        "year": 2009
    }]),
    axis=alt.Axis(format='%m/%d', title='Date in 2009')),
                                            alt.Y(
                                                'low',
                                                title='Price',
                                                scale=alt.Scale(zero=False),
                                            ),
                                            alt.Y2('high'),
                                            color=open_close_color)

bar = alt.Chart(source).mark_bar().encode(x='yearmonthdate(date):T',
                                          y='open',
                                          y2='close',
                                          color=open_close_color)

rule + bar
Exemple #10
0
def main():
    df1 = load_data_1()
    df2 = load_data_2()
    df3 = load_data_3()
    page = st.sidebar.selectbox("Choose a page", ["Table 4", "Table 6"])

    if page == "Table 4":
        st.header(
            "Top 20 doctorate-granting institutions ranked by number of doctorate recipients, by broad field of study: 2017"
        )
        mode = st.selectbox("Total or By Field", ['Total', 'By Field'])
        if mode == 'Total':
            x = 'field'
            y = 'total'
            graph = alt.Chart(df1).mark_bar().encode(
                x=alt.X(x,
                        sort=None,
                        axis=alt.Axis(labelFontSize=15, titleFontSize=20)),
                y=alt.Y(y,
                        sort=None,
                        axis=alt.Axis(labelFontSize=15, titleFontSize=20)),
                color=x,
                tooltip=["field", "total"]).interactive()
            st.altair_chart(graph, use_container_width=True)
            #st.write(graph)
        elif mode == 'By Field':
            field = st.selectbox("Choose a field",
                                 list(np.unique(df2['field'])))
            x = "Field and institution"
            y = "Doctorate recipients"
            tmp = df2.loc[df2['field'] == field, ]
            graph = alt.Chart(tmp).mark_bar().encode(
                x=alt.X(x,
                        sort=None,
                        axis=alt.Axis(labelFontSize=15, titleFontSize=20)),
                y=alt.Y(y,
                        sort=None,
                        axis=alt.Axis(labelFontSize=15,
                                      titleFontSize=20,
                                      title="Number")),
                tooltip=[x, y]).interactive()
            st.altair_chart(graph, use_container_width=True)
    elif page == "Table 6":
        st.header(
            "Doctorates awarded, by state or location, broad field of study, and sex of doctorate recipients: 2017"
        )
        field = st.selectbox("Choose a field", list(np.unique(df3['field'])))
        tmp = df3.loc[df3['field'] == field, ]
        state = st.selectbox("Choose a state/location",
                             list(np.unique(tmp['State or location'])))
        graph = alt.Chart(
            tmp.loc[tmp["State or location"] == state, ]).mark_bar().encode(
                x=alt.X("sex",
                        sort=None,
                        axis=alt.Axis(labelFontSize=15, titleFontSize=20)),
                y=alt.Y("value",
                        sort=None,
                        axis=alt.Axis(labelFontSize=15, titleFontSize=20)),
                tooltip=["field", "sex", "value"]).interactive()
        st.altair_chart(graph, use_container_width=True)
        st.write(
            "Note: For State Not Available in the options, it's either 0 or confidential."
        )
Exemple #11
0
def main(opt):
    preprocessed_train = opt["--preprocessed_train"]

    # Read in data
    try:
        wine_train = pd.read_csv(preprocessed_train)
    except FileNotFoundError:
        print("Input csv file of train set does not exist.")
        sys.exit(1)
    # wine_train = pd.read_csv("../data/winequality-train.csv")
    # Create visualizations
    wine_train["quality"] = wine_train["quality"].astype("category")
    alt.data_transformers.disable_max_rows()

    ## Distribution of outcome variable
    quality_count_path = opt["--quality_count_path"]

    count_chart = (alt.Chart(wine_train).mark_bar(size=40).encode(
        x=alt.X(
            "quality:O",
            type="quantitative",
            title="Quality",
            axis=alt.Axis(format=".0f", ),
        ),
        y=alt.Y("count()"),
        color=alt.Color("quality",
                        title="Wine Grade",
                        scale=alt.Scale(scheme="viridis")),
    ).properties(width=400,
                 title="Histogram: Number of Wines by Quality Class"))

    count_chart.save(quality_count_path)

    ## Create repeated charts for all 11 explanatory variables

    quality_all_variables_path = opt["--quality_all_variables_path"]
    # quality_all_variables_path = "../results/quality_all_variables.png"

    wine_train_for_plotting = wine_train.copy()
    new_colnames = map(
        lambda t: " ".join([word.capitalize() for word in t.split("_")]),
        list(wine_train.columns),
    )
    wine_train_for_plotting.columns = list(new_colnames)

    pych_variables = [
        "Fixed Acidity",
        "Volatile Acidity",
        "Citric Acid",
        "Residual Sugar",
        "Chlorides",
        "Free Sulfur Dioxide",
        "Total Sulfur Dioxide",
        "Density",
        "Ph",
        "Sulphates",
        "Alcohol",
    ]

    bar = (alt.Chart(wine_train_for_plotting).mark_bar().encode(
        x=alt.X("Quality", title="Quality"),
        y=alt.Y(
            alt.repeat("row"),
            type="quantitative",
            aggregate="mean",
            scale=alt.Scale(zero=False),
        ),
        color=alt.Color("Quality",
                        title="Wine Grade",
                        scale=alt.Scale(scheme="viridis")),
    ).properties(width=400, height=300))

    error = (alt.Chart(wine_train_for_plotting).mark_errorbar().encode(
        x=alt.X("Quality"),
        y=alt.Y(
            alt.repeat("row"),
            type="quantitative",
            scale=alt.Scale(zero=False),
        ),
    ))

    quality_all_variables_left = (bar + error).repeat(row=pych_variables[:4], )

    quality_all_variables_middle = (bar + error).repeat(
        row=pych_variables[4:8], )

    quality_all_variables_right = (bar + error).repeat(
        row=pych_variables[8:], )

    quality_all_variables = (quality_all_variables_left
                             | quality_all_variables_middle
                             | quality_all_variables_right)

    quality_all_variables.save(quality_all_variables_path)
Exemple #12
0
"""
Becker's Barley Trellis Plot
----------------------------
The example demonstrates the trellis charts created by Richard Becker, William Cleveland and others in the 1990s. Using the visualization technique below they identified an anomoly in a widely used agriculatural dataset, which they termed ["The Morris Mistake."](https://www.albany.edu/acc/courses/acc522fall2007/lecturenotes/trellis.usermanual.pdf). It became their favored way of showcasing the power of this pioneering plot.
"""
# category: case studies
import altair as alt
from vega_datasets import data

source = data.barley()

alt.Chart(source, title="The Morris Mistake").mark_point().encode(
    alt.X('yield:Q',
          scale=alt.Scale(zero=False),
          axis=alt.Axis(grid=False, title="Barley Yield (bushels/acre)")),
    alt.Y('variety:N',
          sort=alt.EncodingSortField(field='yield',
                                     op='sum',
                                     order='descending'),
          scale=alt.Scale(rangeStep=20),
          axis=alt.Axis(title="", grid=True)),
    color=alt.Color('year:N', legend=alt.Legend(title="Year")),
    row=alt.Row(
        'site:N',
        title="",
        sort=alt.EncodingSortField(field='yield', op='sum',
                                   order='descending'),
    )).configure_view(stroke="transparent")
Exemple #13
0
url = 'https://raw.githubusercontent.com/blmoore/blogR/master/data/measles_incidence.csv'
data = pd.read_csv(url, skiprows=2, na_values='-')
data.head()

annual = data.drop('WEEK', axis=1).groupby('YEAR').sum()
annual.head()

measles = annual.reset_index()
measles = pd.melt(measles, 'YEAR', var_name='state', value_name='incidence')
measles.head()

alt.Chart(measles).mark_rect().encode(x='YEAR:O',
                                      y='state:N',
                                      color='incidence').properties(width=600,
                                                                    height=400)

# Define a custom colormape using Hex codes & HTML color names
colormap = alt.Scale(domain=[0, 100, 200, 300, 1000, 3000],
                     range=[
                         '#F0F8FF', 'cornflowerblue', 'mediumseagreen',
                         '#FFEE00', 'darkorange', 'firebrick'
                     ],
                     type='sqrt')

alt.Chart(measles).mark_rect().encode(
    alt.X('YEAR:O', axis=alt.Axis(title=None, ticks=False)),
    alt.Y('state:N', axis=alt.Axis(title=None, ticks=False)),
    alt.Color('incidence:Q', sort='ascending', scale=colormap,
              legend=None)).properties(width=800, height=500)
#%%
Exemple #14
0
    def _make_manual_legend(self, df, click_selection):
        groups = df.groupby(self.colorby).first().reset_index().sort_values(
            self.colorby, ascending=True)
        group_names = list(groups[self.colorby].values)
        if len(group_names) > self.MAX_LEGEND_MARKS:
            raise ValueError(
                f'max {self.MAX_LEGEND_MARKS} supported for now ({len(group_names)} requested)'
            )
        idx = list(self.MAX_LEGEND_MARKS + 1 - np.arange(len(group_names)))
        row_type = ['normal'] * len(idx)
        idx.append(self.MAX_LEGEND_MARKS + 2)
        row_type.append('title')
        group_names.append(f'Select {self.get("readable_group_name", "line")}')
        xs = np.zeros_like(idx)
        leg_df = pd.DataFrame({
            'idx': idx,
            'group_idx': list(groups['group_idx']) + [-1],
            self._colorby: group_names,
            'x': list(xs),
            'row_type': row_type,
        })

        axis = alt.Axis(domain=False,
                        ticks=False,
                        orient='right',
                        grid=False,
                        labels=False)
        base = alt.Chart(
            leg_df,
            height=self._height,
            width=100,
        )

        def _make_base(base, **extra_kwargs):
            return base.encode(
                x=alt.X('x:Q',
                        title='',
                        axis=axis,
                        scale=alt.Scale(domain=(-5, 20))),
                y=alt.Y('idx:Q',
                        title='',
                        axis=axis,
                        scale=alt.Scale(domain=(0, self.MAX_LEGEND_MARKS))),
                color=self._alt_color,
                detail=self._alt_detail,
                **extra_kwargs)

        legend_points = _make_base(base,
                                   opacity=alt.condition(
                                       self._click_focused_or_none_selected(),
                                       alt.value(1),
                                       alt.value(0.4),
                                   )).mark_point(shape='diamond',
                                                 filled=True,
                                                 size=160)
        legend_points = legend_points.transform_filter(
            'datum.row_type == "normal"')
        cursor = alt.selection_single(name='legend_hover',
                                      nearest=True,
                                      on='mouseover',
                                      clear='mouseout',
                                      fields=['group_idx'],
                                      empty='none')
        layers = [
            legend_points,
            legend_points.
            mark_text(  # fake layer to add the click selection to
                align='left', ).encode(
                    text=f'padded_text:N',
                    opacity=alt.value(0),
                ).transform_calculate(
                    padded_text=f'"__" + datum.{self._colorby} + "__"').
            add_selection(click_selection),
            _make_base(base).mark_point(size=0).add_selection(cursor),
            legend_points.mark_text(
                align='left',
                dx=10,
                font=self._font,
            ).encode(
                text=f'{self._colorby}:N',
                color=alt.value('black'),
                opacity=alt.condition(
                    self._in_focus_or_none_selected(),
                    alt.value(1),
                    alt.value(0.4),
                ),
            ),
            _make_base(base).mark_text(
                align='left',
                dx=-10,
                dy=-5,
                font=self._font,
                fontSize=16,
            ).encode(
                text=f'{self._colorby}:N',
                color=alt.value('black'),
            ).transform_filter('datum.row_type == "title"')
        ]
        return alt.layer(*layers, view=alt.ViewConfig(strokeOpacity=0))
Exemple #15
0
def vaccine_doses():
    data = "data/vaccine_doses.csv"
    filename = "graphs/vaccine_doses.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)
    df["date"] = pd.to_datetime(df["date"])
    df = df[df["granularity_geo"] == "nation"]
    df["new_sma7"] = df.new_doses.rolling(window=7).mean().shift()

    df = df.melt(
        id_vars=["date"],
        value_vars=["total_dose_1", "total_dose_2", "total_dose_3"],
        var_name="category",
        value_name="value",
    ).dropna()

    rename = {
        "total_dose_1": "Dose 1",
        "total_dose_2": "Dose 2",
        "total_dose_3": "Dose 3",
    }

    df["category"] = df["category"].replace(rename)

    chart = (
        alt.Chart(
            df,
            title="Number of people who received their first, second and third dose of a COVID-19 vaccine in Norway (Source: FHI)",
        )
        .mark_area(line={}, opacity=0.3)
        .encode(
            x=alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)),
            y=alt.Y(
                "value:Q",
                stack=None,
                title="Number of people",
            ),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=[
                        "Dose 1",
                        "Dose 2",
                        "Dose 3",
                    ],
                    range=["#5dade2", " #2ecc71", "#006600"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=380,
            legendY=660,
        )
    )

    chart.save(filename)
Exemple #16
0
    def plot_interactive(
        self,
        x_axis: Union[str, Embedding],
        y_axis: Union[str, Embedding],
        annot: bool = True,
        show_axis_point: bool = False,
        color: Union[None, str] = None,
    ):
        """
        Makes highly interactive plot of the set of embeddings.

        Arguments:
            x_axis: the x-axis to be used, must be given when dim > 2
            y_axis: the y-axis to be used, must be given when dim > 2
            annot: drawn points should be annotated
            show_axis_point: ensure that the axis are drawn
            color: a property that will be used for plotting

        **Usage**

        ```python
        from whatlies.language import SpacyLanguage

        words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman",
                 "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire",
                 "dog", "cat", "mouse", "red", "bluee", "green", "yellow", "water",
                 "person", "family", "brother", "sister"]

        lang = SpacyLanguage("en_core_web_md")
        emb = lang[words]

        emb.plot_interactive('man', 'woman')
        ```
        """
        if isinstance(x_axis, str):
            x_axis = self[x_axis]
        if isinstance(y_axis, str):
            y_axis = self[y_axis]

        plot_df = pd.DataFrame({
            "x_axis":
            self.compare_against(x_axis),
            "y_axis":
            self.compare_against(y_axis),
            "name": [v.name for v in self.embeddings.values()],
            "original": [v.orig for v in self.embeddings.values()],
        })

        if color:
            plot_df[color] = [
                getattr(v, color) if hasattr(v, color) else ''
                for v in self.embeddings.values()
            ]

        if not show_axis_point:
            plot_df = plot_df.loc[
                lambda d: ~d["name"].isin([x_axis.name, y_axis.name])]

        result = (alt.Chart(plot_df).mark_circle(size=60).encode(
            x=alt.X("x_axis", axis=alt.Axis(title=x_axis.name)),
            y=alt.X("y_axis", axis=alt.Axis(title=y_axis.name)),
            tooltip=["name", "original"],
            color=alt.Color(":N", legend=None)
            if not color else alt.Color(color),
        ).properties(title=f"{x_axis.name} vs. {y_axis.name}").interactive())

        if annot:
            text = (alt.Chart(plot_df).mark_text(
                dx=-15, dy=3, color="black").encode(
                    x=alt.X("x_axis", axis=alt.Axis(title=x_axis.name)),
                    y=alt.X("y_axis", axis=alt.Axis(title=y_axis.name)),
                    text="original",
                ))
            result = result + text
        return result
Exemple #17
0
def tested_lab():
    data = "data/tested_lab.csv"
    filename = "graphs/tested_lab.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)

    mapping = {
        "new_neg": "New (Negative)",
        "new_pos": "New (Positive)",
        "new_total": "New",
        "pr100_pos": "Share Positive",
        "total": "Cumulative",
    }

    df = df.rename(columns=mapping)
    df["date"] = pd.to_datetime(df["date"])
    df["Share Negative"] = 100 - df["Share Positive"]
    df = df.melt(
        id_vars=["date", "Share Positive"], var_name="category", value_name="value"
    )

    base = alt.Chart(
        df,
        title="Number of tested persons per specimen collection date and number of positive results (Source: FHI)",
    ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)))

    andel = base.mark_line(color="red", opacity=0.8).encode(
        y=alt.Y("Share Positive:Q", title="% Positive", axis=alt.Axis(grid=True))
    )

    bar = (
        base.transform_filter(
            (alt.datum.category == "New (Negative)")
            | (alt.datum.category == "New (Positive)")
        )
        .mark_bar()
        .encode(
            y=alt.Y("value:Q", title="Number of persons"),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=["New (Positive)", "New (Negative)", "% Positive"],
                    range=["#FF9622", "#6DA9FF", "red"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
    )

    chart = (
        alt.layer(bar, andel)
        .resolve_scale(y="independent")
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=480,
            legendY=655,
        )
    )

    chart.save(filename)
Exemple #18
0
#add some helping columns for plotting
chart_data["today"] = today
chart_data["max_price"] = chart_data["price"].max()

#filter by start_date and end_date
date_mask = (chart_data['date'].dt.date >
             start_date) & (chart_data['date'].dt.date <= end_date)
chart_data = chart_data[date_mask]

#create separate df for highlighting the forecasted area
pred_data = chart_data[chart_data["date"].dt.date >= today]

#build charts
base = alt.Chart(chart_data, height=500, width=700)

line = base.mark_line().encode(x='date:T',
                               y=alt.Y('price:Q',
                                       axis=alt.Axis(title='price')),
                               color='stock:N')
'''
The forecast made by model is under the lightblue area.
'''

band = alt.Chart(pred_data, height=500, width=700).mark_area(
    opacity=0.5, color='lightblue').encode(x='date',
                                           y=alt.Y('max_price',
                                                   axis=alt.Axis(title='')))

st.altair_chart(line + band)
'''The data is gathered from Yahoo Finance API.'''
Exemple #19
0
"""
Line Chart with Percent axis
----------------------------
This example shows how to set an axis as a percent.
"""

import altair as alt
from altair.expr import datum
from vega_datasets import data

source = data.jobs.url

chart = alt.Chart(source).mark_line().encode(
    alt.X('year:O'), alt.Y('perc:Q', axis=alt.Axis(format='%')),
    color='sex:N').properties(
        title='Percent of work-force working as Welders').transform_filter(
            datum.job == 'Welder')
Exemple #20
0
    "x": 14,
    "y": 66
}, {
    "x": 15,
    "y": 17
}, {
    "x": 16,
    "y": 27
}, {
    "x": 17,
    "y": 68
}, {
    "x": 18,
    "y": 16
}, {
    "x": 19,
    "y": 49
}, {
    "x": 20,
    "y": 15
}])

area1 = alt.Chart(df).mark_area(clip=True, interpolate='monotone').encode(
    alt.X('x', scale=alt.Scale(zero=False, nice=False)),
    alt.Y('y', scale=alt.Scale(domain=[0, 50]), axis=alt.Axis(title='y')),
    opacity=alt.value(0.6)).properties(width=500, height=75)

area2 = area1.encode(y='ny:Q').transform_calculate("ny", datum.y - 50)

area1 + area2
    print('\n')
charts = {}
info = []
for i in range(12, 39):
    info.append('id_' + str(i))
for i in info:
    width_len = 400
    if i in ['id_30', 'id_31', 'id_33']:
        width_len = 600
    feature_count = train[i].value_counts(dropna=False).reset_index().rename(
        columns={
            i: 'count',
            'index': i
        })
    chart = alt.Chart(feature_count).mark_bar().encode(
        y=alt.Y(f"{i}:N", axis=alt.Axis(title=i)),
        x=alt.X('count:Q', axis=alt.Axis(title='Count')),
        tooltip=[i, 'count']).properties(title=f"Counts of {i}",
                                         width=width_len)
    charts[i] = chart

for i in ['id_30', 'id_31', 'id_33']:
    feature_count = train[i].value_counts(
        dropna=False)[:40].reset_index().rename(columns={
            i: 'count',
            'index': i
        })
    chart = alt.Chart(feature_count).mark_bar().encode(
        x=alt.X(f"{i}:N", axis=alt.Axis(title=i)),
        y=alt.Y('count:Q', axis=alt.Axis(title='Count')),
        tooltip=[i, 'count']).properties(title=f"Counts of {i}", width=800)
"""
Calculating Percentage of Total
-------------------------------
This chart demonstrates how to use a window transform to display data values
as a percentage of total values.
"""
# category: bar charts
import altair as alt
import pandas as pd

activities = pd.DataFrame({
    'Activity': ['Sleeping', 'Eating', 'TV', 'Work', 'Exercise'],
    'Time': [8, 2, 4, 8, 2]
})

alt.Chart(activities).mark_bar().encode(
    alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%')),
    y='Activity:N').transform_window(TotalTime='sum(Time)', frame=[
        None, None
    ]).transform_calculate(PercentOfTotal="datum.Time / datum.TotalTime")
Exemple #23
0
    def build_graph(self):

        with open(os.path.join(os.path.dirname(__file__), 'colors.json')) as f:
            colors = json.load(f)
        allColorsValues = []

        # filter data
        max_languages = 5
        top_languages = {}
        for year in self.yearly_data.keys():
            for quarter in self.yearly_data[year].keys():
                for language in sorted(list(self.yearly_data[year][quarter].keys()),
                                       key=lambda lang: self.yearly_data[year][quarter][lang], reverse=True)[
                                0:max_languages]:
                    if 'top' not in self.yearly_data[year][quarter]:
                        self.yearly_data[year][quarter]['top'] = {}
                    if self.yearly_data[year][quarter][language] != 0:
                        self.yearly_data[year][quarter]['top'][language] = self.yearly_data[year][quarter][language]

                        if language not in top_languages:
                            top_languages[language] = 1
                        top_languages[language] += 1

        # print(self.yearly_data)

        all_languages = list(top_languages.keys())

        for language in all_languages:
            if colors[language]['color'] is not None:
                allColorsValues.append(colors[language]['color'])

        languages_all_loc = {}

        for language in all_languages:
            language_year = []
            for year in self.yearly_data.keys():
                language_quarter = [0, 0, 0, 0]
                for quarter in self.yearly_data[year].keys():
                    if language in self.yearly_data[year][quarter]['top']:
                        language_quarter[quarter - 1] = self.yearly_data[year][quarter]['top'][language]
                    else:
                        language_quarter[quarter - 1] = 0
                language_year.append(language_quarter)
            languages_all_loc[language] = language_year

        # print(languages_all_loc)

        language_df = {}

        def prep_df(df, name):
            df = df.stack().reset_index()
            df.columns = ['c1', 'c2', 'values']
            df['Language'] = name
            return df

        for language in languages_all_loc.keys():
            language_df[language] = pd.DataFrame(languages_all_loc[language], index=list(self.yearly_data.keys()),
                                                 columns=["Q1", "Q2", "Q3", "Q4"])

        for language in language_df.keys():
            language_df[language] = prep_df(language_df[language], language)

        df = pd.concat(language_df.values())

        chart = alt.Chart(df).mark_bar().encode(

            # tell Altair which field to group columns on
            x=alt.X('c2:N', title=None),

            # tell Altair which field to use as Y values and how to calculate
            y=alt.Y('sum(values):Q',
                    axis=alt.Axis(
                        grid=False,
                        title='Lines Of Code added')),

            # tell Altair which field to use to use as the set of columns to be  represented in each group
            column=alt.Column('c1:N', title=None),

            # tell Altair which field to use for color segmentation
            color=alt.Color('Language:N',
                            scale=alt.Scale(
                                domain=all_languages,
                                # make it look pretty with an enjoyable color pallet
                                range=allColorsValues,
                            ),
                            )) \
            .configure_view(
            # remove grid lines around column clusters
            strokeOpacity=0
        )
        chart.save('bar_graph.png')
        return 'bar_graph.png'
Exemple #24
0
def dead():
    data = "data/dead.csv"
    filename = "graphs/dead.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)

    today = date.today()
    idx = pd.date_range("2020-03-07", df["date"].max())
    df.index = pd.DatetimeIndex(df["date"])
    df = df.reindex(idx)
    df["date"] = df.index
    df = df.reset_index(drop=True)
    df = df[df.date <= str(today)]

    df["new"] = df["new"].fillna(0).astype(int)
    df["total"] = df["total"].fillna(method="bfill").astype(int)
    df["new_sma7"] = df.new.rolling(window=7).mean()

    df = df.melt(
        id_vars=["date"],
        value_vars=["new", "new_sma7", "total"],
        var_name="category",
        value_name="value",
    ).dropna()

    rename = {"new": "New", "new_sma7": "Avg 7 d.", "total": "Cumulative"}
    df["category"] = df["category"].replace(rename)

    base = alt.Chart(df, title="COVID-19 related deaths (Source: FHI)").encode(
        alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40))
    )

    bar = (
        base.transform_filter(alt.datum.category == "New")
        .mark_bar(color="#FFD1D1")
        .encode(y=alt.Y("value:Q", axis=alt.Axis(title="New per day", grid=True)))
    )

    line = (
        base.transform_filter(alt.datum.category == "Cumulative")
        .mark_line(color="#2E507B", strokeWidth=3)
        .encode(
            y=alt.Y("value:Q", axis=alt.Axis(title="Cumulative")),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=["New", "Avg 7 d.", "Cumulative"],
                    range=["#FFD1D1", "red", "#2E507B"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
    )

    ma7 = (
        base.transform_filter(alt.datum.category == "Avg 7 d.")
        .mark_line(opacity=0.8)
        .encode(y=alt.Y("value:Q"), color=alt.Color("category:N"))
    )

    chart = (
        alt.layer(bar + ma7, line)
        .resolve_scale(y="independent")
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=480,
            legendY=655,
        )
    )

    chart.save(filename)
Exemple #25
0
}, {
    "year": "2014",
    "population": 41777
}]

source2 = [{
    "start": "1933",
    "end": "1945",
    "event": "Nazi Rule"
}, {
    "start": "1948",
    "end": "1989",
    "event": "GDR (East Germany)"
}]

source = alt.pd.DataFrame(source)
source2 = alt.pd.DataFrame(source2)

line = alt.Chart(source).mark_line(color='#333').encode(
    alt.X('year:T',
          axis=alt.Axis(format='%Y')), y='population').properties(width=600,
                                                                  height=400)

point = line.mark_point(color='#333')

rect = alt.Chart(source2).mark_rect().encode(x='start:T',
                                             x2='end:T',
                                             color='event:N')

rect + line + point
Exemple #26
0
def hospitalized():
    data = "data/hospitalized.csv"
    filename = "graphs/hospitalized.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)

    today = date.today()
    idx = pd.date_range("2020-03-08", today)
    df.index = pd.DatetimeIndex(df["date"])
    df = df.reindex(idx)
    df["date"] = df.index
    df = df.reset_index(drop=True)

    df["admissions"] = df["admissions"].fillna(method="ffill").astype(int)
    df["icu"] = df["icu"].fillna(method="ffill").astype(int)
    df["respiratory"] = df["respiratory"].fillna(method="ffill").astype(int)

    df_melt = pd.melt(
        df,
        id_vars=["date"],
        value_vars=["admissions", "icu", "respiratory"],
        value_name="value",
    ).replace(
        {
            "admissions": "Hospitalized",
            "icu": "Intensive",
            "respiratory": "Respirator",
        }
    )

    chart = (
        alt.Chart(
            df_melt,
            title="Number of patients admitted to hospital with COVID-19 (Source: Helsedirektoratet)",
        )
        .mark_area(line={}, opacity=0.3)
        .encode(
            x=alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)),
            y=alt.Y(
                "value:Q",
                stack=None,
                title="Number of patients",
            ),
            color=alt.Color(
                "variable:N",
                scale=alt.Scale(
                    domain=["Hospitalized", "Intensive Care", "Respirator"],
                    range=["#5A9DFF", "#FF8B1B", "#FF642B"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=480,
            legendY=655,
        )
    )

    chart.save(filename)
Exemple #27
0
def outcome_bars(data, name=None, width=100):
    """
    Create a bar chart showing the percentage of hands won, lost, and pushed
    """
    # if it's a dataframe already, just add the name for the legend
    if isinstance(data, pd.DataFrame):
        data_list = [data]
    elif isinstance(data, list):
        # check if it's a list of dicionaries, like player history, or a list
        # of lists
        for item in data:
            l_o_d = isinstance(item, dict)
        # if it's a list of dictionaries, just convert them
        if l_o_d:
            data_list = [pd.DataFrame(data)]
        else:
            data_list = [pd.DataFrame(item) for item in data]
    else:
        msg = "'data' must be a DataFrame or list"
        raise TypeError(msg)
    # calculate percentages
    # assign name to data
    if not name:
        name = [f"Game{i}" for i in range(len(data))]
    plot_data_list = []  # list to hold dataframes that will be plotted
    for _name, _data in zip(name, data_list):
        win, loss, push, surrender = results_pct(_data, as_series=False)
        plot_data_list.append(
            {"game": _name, "result": "Win", "pct": win, "order": 1},
        )
        plot_data_list.append(
            {"game": _name, "result": "Loss", "pct": loss, "order": 2}
        )
        plot_data_list.append(
            {"game": _name, "result": "Push", "pct": push, "order": 3}
        )
        plot_data_list.append(
            {"game": _name, "result": "Surrender", "pct": surrender, "order": 3}
        )
    plot_data = pd.DataFrame(plot_data_list)

    # create altair chart
    chart = alt.Chart(plot_data, width=width).mark_bar().encode(
        x=alt.X(
            "game",
            axis=alt.Axis(labelAngle=-45),
            title=None,
            sort=["Win", "Loss", "Push"]
        ),
        y=alt.Y(
            "pct:Q"
        ),
        color=alt.Color(
            "game:O",
            legend=None
        ),
        column=alt.Column(
            "result:O",
            title="Result"
        ),
        tooltip=[
            alt.Tooltip("pct", title="Pct")
        ]
    )
    return chart
Exemple #28
0
def smittestopp():
    data = "data/smittestopp.csv"
    filename = "graphs/smittestopp.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)
    df["date"] = pd.to_datetime(df["date"])

    df = df.melt(
        id_vars=["date"],
        value_vars=["new_reported", "total_downloads"],
        var_name="category",
        value_name="value",
    ).dropna()

    rename = {
        "new_reported": "Number of reported infections",
        "total_downloads": "Number of downloads",
    }

    df["category"] = df["category"].replace(rename)

    base = alt.Chart(
        df,
        title="Number of downloads of Smittestopp og number of reported infections through the app (Source: FHI)",
    ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)))

    downloads = (
        base.transform_filter(alt.datum.category == "Number of downloads")
        .mark_area(line={}, color="#5BC1FF", opacity=0.2)
        .encode(
            y=alt.Y(
                "value:Q",
                axis=alt.Axis(title="Number of downloads", grid=True),
            )
        )
    )

    reported = (
        base.transform_filter(alt.datum.category == "Number of reported infections")
        .mark_bar(color="#FFA57E")
        .encode(
            y=alt.Y("value:Q", axis=alt.Axis(title="Number of reported infections")),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=[
                        "Number of downloads",
                        "Number of reported infections",
                    ],
                    range=["#5BC1FF", "#FFA57E"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
    )

    chart = (
        alt.layer(reported, downloads)
        .resolve_scale(y="independent")
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            labelLimit=200,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=390,
            legendY=660,
        )
    )

    chart.save(filename)
Exemple #29
0
    def labels(self, internal=False, **kwargs):
        self._parseArgs(call='labels', **kwargs)
        if internal:
            temp = self.source.groupby('variable').last().reset_index()
            if self.date_label:
                if type(self.date_label) == bool:
                    if self._datebased:
                        max_time = temp['x'].max()
                        if self.timezone:
                            formatted_time = pd.to_datetime(
                                max_time, infer_datetime_format=True, utc=True)
                            _formatted_time = str(
                                formatted_time.tz_localize(None)) + ' ' + str(
                                    formatted_time.tzinfo)
                        else:
                            formatted_time = pd.to_datetime(
                                max_time, infer_datetime_format=True)
                            _formatted_time = str(
                                formatted_time.tz_localize(None))
                        temp_time = pd.DataFrame([{
                            'variable': 'Time',
                            'x': max_time,
                            'value': _formatted_time
                        }])
                elif type(self.date_label) == str:
                    if self.date_label in {'Date', 'date', 'days', 'day'}:
                        max_time = temp['x'].max()
                        formatted_time = pd.to_datetime(
                            max_time, infer_datetime_format=True).date()
                        temp_time = pd.DataFrame([{
                            'variable': 'Time',
                            'x': max_time,
                            'value': str(formatted_time)
                        }])
                    elif self.date_label in {'Time', 'time', 'hour', 'hours'}:
                        max_time = temp['x'].max()
                        formatted_time = pd.to_datetime(
                            max_time, infer_datetime_format=True).time()
                        temp_time = pd.DataFrame([{
                            'variable': 'Time',
                            'x': max_time,
                            'value': str(formatted_time)
                        }])
                    else:
                        temp_time = pd.DataFrame([{
                            'variable': '  ',
                            'x': str(self.date_label),
                            'value': str(self.date_label)
                        }])
                else:
                    raise ValueError('Unsupported date_label argument.')
                _width = 80 + 4 * int(len(temp_time['value'].values[0]))
                width = self.prop.get('width')
                width1, width2 = width - _width, _width
                labels = alt.Chart(temp).mark_text(**self.textMark).encode(
                    x=alt.X('variable:O',
                            axis=alt.Axis(**self.labelsAxis),
                            title=None),
                    text=alt.Text('value:Q',
                                  format=self.format)).properties(width=width1,
                                                                  height=30,
                                                                  title='')

                time_label = alt.Chart(temp_time).mark_text(
                    **self.textMark).encode(
                        x=alt.X('variable:O',
                                axis=alt.Axis(**self.labelsAxis),
                                title=None),
                        text=alt.Text('value:O', )).properties(width=width2,
                                                               height=30,
                                                               title='')
                labels = alt.hconcat(labels,
                                     time_label,
                                     spacing=0,
                                     title=alt.TitleParams(text=self.prop.get(
                                         'title', 'Title Needed'),
                                                           anchor='middle'))
            else:
                temp = self.source.groupby('variable').last().reset_index()
                labels = alt.Chart(temp).mark_text(**self.textMark).encode(
                    x=alt.X('variable:O',
                            axis=alt.Axis(**self.labelsAxis),
                            title=None),
                    text=alt.Text('value:Q', format=self.format)).properties(
                        width=self.prop.get('width'),
                        height=30,
                        title=self.prop.get('title', 'Title Needed'))
            if self.prop.get('title'):
                self.prop.pop('title')
            self._labels = labels
        return self
Exemple #30
0
    def plot_interactive(
        self,
        x_axis: Union[int, str, Embedding] = 0,
        y_axis: Union[int, str, Embedding] = 1,
        axis_metric: Optional[Union[str, Callable, Sequence]] = None,
        x_label: Optional[str] = None,
        y_label: Optional[str] = None,
        title: Optional[str] = None,
        annot: bool = True,
        color: Union[None, str] = None,
    ):
        """
        Makes highly interactive plot of the set of embeddings.

        Arguments:
            x_axis: the x-axis to be used, must be given when dim > 2; if an integer, the corresponding
                dimension of embedding is used.
            y_axis: the y-axis to be used, must be given when dim > 2; if an integer, the corresponding
                dimension of embedding is used.
            axis_metric: the metric used to project each embedding on the axes; only used when the corresponding
                axis (i.e. `x_axis` or `y_axis`) is a string or an `Embedding` instance. It could be a string
                (`'cosine_similarity'`, `'cosine_distance'` or `'euclidean'`), or a callable that takes two vectors as input
                and returns a scalar value as output. To set different metrics for x- and y-axis, a list or a tuple of
                two elements could be given. By default (`None`), normalized scalar projection (i.e. `>` operator) is used.
            x_label: an optional label used for x-axis; if not given, it is set based on `x_axis` value.
            y_label: an optional label used for y-axis; if not given, it is set based on `y_axis` value.
            title: an optional title for the plot; if not given, it is set based on `x_axis` and `y_axis` values.
            annot: drawn points should be annotated
            color: a property that will be used for plotting

        **Usage**

        ```python
        from whatlies.language import SpacyLanguage

        words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman",
                 "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire",
                 "dog", "cat", "mouse", "red", "bluee", "green", "yellow", "water",
                 "person", "family", "brother", "sister"]

        lang = SpacyLanguage("en_core_web_sm")
        emb = lang[words]

        emb.plot_interactive('man', 'woman')
        ```
        """
        if isinstance(x_axis, str):
            x_axis = self[x_axis]
        if isinstance(y_axis, str):
            y_axis = self[y_axis]

        if isinstance(axis_metric, (list, tuple)):
            x_axis_metric = axis_metric[0]
            y_axis_metric = axis_metric[1]
        else:
            x_axis_metric = axis_metric
            y_axis_metric = axis_metric

        # Determine axes values and labels
        if isinstance(x_axis, int):
            x_val = self.to_X()[:, x_axis]
            x_lab = "Dimension " + str(x_axis)
        else:
            x_axis_metric = Embedding._get_plot_axis_metric_callable(x_axis_metric)
            x_val = self.compare_against(x_axis, mapping=x_axis_metric)
            x_lab = x_axis.name

        if isinstance(y_axis, int):
            y_val = self.to_X()[:, y_axis]
            y_lab = "Dimension " + str(y_axis)
        else:
            y_axis_metric = Embedding._get_plot_axis_metric_callable(y_axis_metric)
            y_val = self.compare_against(y_axis, mapping=y_axis_metric)
            y_lab = y_axis.name
        x_label = x_label if x_label is not None else x_lab
        y_label = y_label if y_label is not None else y_lab
        title = title if title is not None else f"{x_lab} vs. {y_lab}"

        plot_df = pd.DataFrame(
            {
                "x_axis": x_val,
                "y_axis": y_val,
                "name": [v.name for v in self.embeddings.values()],
                "original": [v.orig for v in self.embeddings.values()],
            }
        )

        if color:
            plot_df[color] = [
                getattr(v, color) if hasattr(v, color) else ""
                for v in self.embeddings.values()
            ]

        result = (
            alt.Chart(plot_df)
            .mark_circle(size=60)
            .encode(
                x=alt.X("x_axis", axis=alt.Axis(title=x_label)),
                y=alt.X("y_axis", axis=alt.Axis(title=y_label)),
                tooltip=["name", "original"],
                color=alt.Color(":N", legend=None) if not color else alt.Color(color),
            )
            .properties(title=title)
            .interactive()
        )

        if annot:
            text = (
                alt.Chart(plot_df)
                .mark_text(dx=-15, dy=3, color="black")
                .encode(
                    x="x_axis",
                    y="y_axis",
                    text="original",
                )
            )
            result = result + text
        return result