Beispiel #1
0
def show_hillside(V, P0):
    perm = pd.Series(P0, index=V.columns)
    r = perm.argsort()
    #V_G=V.iloc[perm,:].iloc[:,perm]

    #x = pd.DataFrame(details['x'],index=V.index,columns=V.columns).iloc[perm,:].iloc[:,perm]
    #r = x.sum(axis=1)

    df = V.T.stack().to_frame().reset_index()
    df.columns = ["team_i_name", "team_k_name", "v"]
    df["ri"] = list(-r.loc[df["team_i_name"]])
    df["rk"] = list(r.loc[df["team_k_name"]])

    g = alt.Chart(df).mark_circle().encode(
        x=alt.X(
            'team_i_name:N',
            axis=alt.Axis(labelOverlap=False),
            title="r",
            sort=alt.SortField(field="ri",
                               order="descending")  # The order to sort in
        ),
        y=alt.Y(
            'team_k_name:N',
            axis=alt.Axis(labelOverlap=False),
            title="r",
            sort=alt.SortField(field="rk",
                               order="ascending")  # The order to sort in
        ),
        size='v:Q')
    return g
def get_most_accepted_graph():
    """returns graph with all countries that were most accepted
    
       Parameters: None

       Returns: Graph

    """
    df_nation = get_nations_df()
    df_nation_accepted_sort_desc = df_nation.sort_values(
        'Verhältnis der Asylgewährungen zu Asylgesuchen', ascending=False)
    df_nation_accepted_sort_desc = df_nation_accepted_sort_desc.iloc[:10]

    df_nation_most_accepted = df_nation.sort_values(
        'Total Asylgewährungen pro Land', ascending=False)
    df_nation_most_accepted = df_nation_most_accepted.iloc[:10]

    chart_most_accepted_percentage = alt.Chart(
        df_nation_accepted_sort_desc).mark_bar().encode(
            x='Verhältnis der Asylgewährungen zu Asylgesuchen:Q',
            y=alt.Y('Nation:O',
                    sort=alt.SortField(
                        field="Verhältnis der Asylgewährungen zu Asylgesuchen",
                        order='descending')))

    text_most_accepted_percentage = chart_most_accepted_percentage.mark_text(
        align='left', baseline='middle',
        dx=3).encode(text='Total Asylgesuche pro Land:Q')

    chart_most_accepted = alt.Chart(df_nation_most_accepted).mark_bar().encode(
        x='Total Asylgewährungen pro Land:Q',
        y=alt.Y('Nation:O',
                sort=alt.SortField(field="Total Asylgewährungen pro Land",
                                   order='descending')))

    text_most_accepted = chart_most_accepted.mark_text(
        align='left', baseline='middle',
        dx=3).encode(text='Total Asylgewährungen pro Land:Q')

    return alt.hconcat((chart_most_accepted + text_most_accepted).properties(
        width=250,
        height=150,
        title='Top 10 Länder mit den meisten angenommenen Gesuchen'
    ), (
        chart_most_accepted_percentage + text_most_accepted_percentage
    ).properties(
        width=250,
        height=150,
        title='Top 10 Länder mit den prozentual meisten angenommenen Gesuchen')
                       )
Beispiel #3
0
def get_chart(keyword):
    db_string = "postgres://*****:*****@postgres:5432/shared"
    if keyword == "*":
        query = "select article_id,string_date,site,palabra,n_w from tb_news_covid_mexico_palabras_top_tfidf"
    else:
        query = "select article_id,string_date,site,palabra,n_w from tb_news_covid_mexico_palabras_top_tfidf where article_id in (select article_id from tb_news_covid_mexico_date_text where clean_text LIKE '%" + keyword + "%' )"
    db = create_engine(db_string)
    df = pd.read_sql_query(sqlalchemy.text(query), db)
    chart3 = alt.Chart(df).mark_point().encode(
        y='count()', x='string_date:T').properties(width=900).interactive()

    chart1 = alt.Chart(df).mark_bar().encode(
        x=alt.X('count(article_id):Q'),
        y=alt.Y("site:N",
                sort=alt.EncodingSortField(
                    field="site", op="count",
                    order="descending"))).transform_aggregate(
                        groupby=["article_id", "site"]).properties(height=800)

    chart2 = alt.Chart(df).mark_bar().encode(
        x=alt.X('freq_palabras:Q', aggregate="sum"),
        y=alt.Y(
            "palabra",
            sort=alt.EncodingSortField(
                field="freq_palabras", op="sum",
                order="descending"))).transform_aggregate(
                    freq_palabras='sum(n_w)',
                    groupby=["palabra"],
                ).transform_window(
                    rank='row_number()',
                    sort=[alt.SortField("freq_palabras", order="descending")],
                ).transform_filter(
                    (alt.datum.rank < 25)).properties(height=800)

    return alt.vconcat(chart3, alt.hconcat(chart1, chart2)).to_json()
Beispiel #4
0
def draw_chart(data, zipcode):
    # color expression, highlights bar on mouseover
    color_expression    = "highlight._vgsid_==datum._vgsid_"
    color_condition     = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue")

    #highlight bar when mouse is hovering
    highlight_selection = alt.selection_single(name="highlight", empty="all", on="mouseover")
    try:
        data = pd.concat([data["cuisine"], data["perZip"+zipcode]], axis=1).nlargest(25, "perZip"+zipcode)
        maxCount = int(data["perZip"+zipcode].max())
    except KeyError:
        maxCount = 1
        data = pd.DataFrame([{"cuisine":"", "perZip"+zipcode:0}])

    return alt.Chart(data) \
              .mark_bar(stroke="Black") \
              .encode(
                  alt.X("perZip"+zipcode+":Q",
                  axis=alt.Axis(title="Number of Restaurants"),
                    scale=alt.Scale(domain=(0,maxCount))),
                  alt.Y("cuisine:O", axis=alt.Axis(title="Cuisine Type"),
                    sort=alt.SortField(field="perZip"+zipcode,
                    op="argmax")),
                  alt.ColorValue("LightGrey",
                  condition=color_condition),
              ).properties(
                selection = highlight_selection,
              )
Beispiel #5
0
def createChart(data, zipcode):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")
    try:
        data = pd.concat([data['cuisine'], data['perZip' + zipcode]],
                         axis=1).nlargest(20, 'perZip' + zipcode)
        maxCount = int(data['perZip' + zipcode].max())
    except KeyError:
        maxCount = 1
        data = pd.DataFrame([{"cuisine": "", 'perZip' + zipcode: 0}])

    return alt.Chart(data) \
              .mark_bar(stroke="Black") \
              .encode(
                  alt.X('perZip'+zipcode+':Q', axis=alt.Axis(title="Restaurants"),
                    scale=alt.Scale(domain=(0,maxCount))),
                  alt.Y('cuisine:O', axis=alt.Axis(title="cuisine"),
                    sort=alt.SortField(field='perZip'+zipcode, op="argmax")),
                  alt.ColorValue("LightGrey", condition=color_condition),
              ).properties(
                selection = highlight_selection,
              )
Beispiel #6
0
def createChart(data, zipcode):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")
    #rating_selection    = alt.selection_single(name="rating", empty="all", encodings=['y'])
    data = data[data['zipcode'] == zipcode].nlargest(20, 'num')
    try:
        maxCount = int(data['num'].max())
    except ValueError:
        maxCount = 10
        data = pd.DataFrame([{"name": "undefine", "num": 0}])

    return alt.Chart(data)\
              .mark_bar(stroke="Black")\
              .encode(
                  alt.X("num:Q", axis=alt.Axis(title="Restaurants"),
                    scale=alt.Scale(domain=(0,maxCount))),
                  alt.Y('name:O', axis=alt.Axis(title="cuisine"),
                    sort=alt.SortField(field="num", op="argmax")),
                  alt.ColorValue("LightGrey", condition=color_condition),
              ).properties(
                selection = highlight_selection,
              )
Beispiel #7
0
def display_the_plot():
    df = pd.concat(esb_data[esb_data["Type"] == var1]
                   for var1 in session["selection_city"])
    df1 = pd.concat(df[df["Council"] == var2]
                    for var2 in session["selection_council"])
    df2 = pd.concat(df1[df1["Year"] == int(var3)]
                    for var3 in session["selection_year"])
    print(df)
    print(df1)
    print(df2)

    ## print(df.shape)
    ## print(session["selection"])
    plot = alt.Chart(df2).mark_bar().encode(
        alt.X(
            'No_Of_Connections:Q',
            sort=alt.SortField(field='No_Of_Connections', order='ascending'),
            scale=alt.Scale(domain=(0, 1000)),
            axis=alt.Axis(title="Connection Count", tickCount=20),
        ),
        alt.Y('Month:O'),
        alt.Color('Month:N'),
        alt.Facet('Year:O'),
    ).properties(width=200)
    plot.save("templates/plot.html")
    return render_template("plot.html")
def show_game_publisher(df):
    st.write('## Top 20 Best-selling Game for each Company/Publisher')

    st.write("**What are the most popular games by each company/publisher?**")

    columns = list(df["Publisher"].unique())
    select_box = alt.binding_select(options=columns,
                                    name='Select a Publisher:')
    sel = alt.selection_single(fields=['Publisher'],
                               bind=select_box,
                               init={'Publisher': 'Nintendo'})

    # stacked bar chart
    st.write(
        "💡 *You can select specific publisher/company with the dropdown menu below*"
    )
    st.write(
        "💡 *Hover over the game titles to see their popularities in different regions*"
    )

    st.write(
        alt.Chart(df).transform_filter(sel).mark_bar().encode(
            x=alt.X('Year:N'),
            y=alt.Y('sum(Global_Sales):Q', title='Sale (in millions)'),
            color=alt.Color('Name', scale=alt.Scale(scheme='tableau20')),
            order=alt.Order('sum(Global_Sales)', sort='ascending'),
            tooltip=[
                'Name', 'Genre', 'sum(NA_Sales)', 'sum(EU_Sales)',
                'sum(JP_Sales)', 'sum(Other_Sales)'
            ]).transform_window(
                rank="rank(Global_Sales)",
                sort=[alt.SortField("Global_Sales", order="descending")
                      ]).transform_filter(
                          (alt.datum.rank < 20)).add_selection(sel).properties(
                              width=800, height=500))
Beispiel #9
0
def createChart(data):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    #color_condition     = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")
    rating_selection = alt.selection_single(name="rating",
                                            empty="all",
                                            encodings=['y'])
    maxCount = int(data['restaurants'].max())

    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("mean(restaurants):Q", axis=alt.Axis(title="Restaurants")),
            alt.Y('cuisine:O', axis=alt.Axis(title="Cuisine"),
                  sort=alt.SortField(field="restaurants", op="mean", order='descending')),
            alt.ColorValue("LightGrey"),#, condition=color_condition), # Remove color condition
        ).properties(
            width=200,
            height=350,
            selection = highlight_selection+rating_selection,
        )

    return alt.hconcat(barMean, data=data)
Beispiel #10
0
def createChart(data, name=''):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")


    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("rating:Q", axis=alt.Axis(title="The number of restaurants")),
            alt.Y('name:O', axis=alt.Axis(title="Cuisines".format(name)),
                  sort=alt.SortField(field="rating", op="mean", order='descending')),
            alt.ColorValue("LightGrey", condition=color_condition),
        ).properties(
            selection = highlight_selection,
        )

    return alt.hconcat(
        barMean,
        data=data,
        title="The number of restaurants ({} in NYC) - Top 25 cuisines".format(
            name))
Beispiel #11
0
def plot_heatmap(df, x, x_sort, x_title, y, y_sort, y_title, color,
                 color_title):
    return altair.Chart(df).mark_rect().encode(
        x=altair.X(field=x,
                   type='ordinal',
                   sort=altair.SortField(x_sort),
                   title=x_title),
        y=altair.Y(
            field=y,
            type='ordinal',
            sort=altair.SortField(y_sort),
            title=y_title,
        ),
        color=altair.Color(
            field=color,
            type='quantitative',
            aggregate='sum',
            title=color_title,
        ))
Beispiel #12
0
def plot_feature_importance(feature_df, lr_feature_df, output):
    """
  Creates a ranked bar chart of which features are the most important
  predictors of the random forest regression.
  
  Parameters:
  -----------
  feature_df: (dataframe) dataframe of feature names and importances/weights
  output: (filepath) filepath to where the results are stored
  
  Returns:
  --------
  image file of feature importance plots
  """
    print("Plotting most important features from random forest...")
    rfr_plot = alt.Chart(feature_df).mark_bar(
        color="green",
        opacity=0.6).encode(y=alt.Y("feature_names:N",
                                    sort=alt.SortField(field="importance:Q"),
                                    title="Features"),
                            x=alt.X("importance:Q",
                                    title="Feature Importance")).properties(
                                        title="(A) Random Forest Regression",
                                        width=300,
                                        height=300)

    print("Plotting most important features from linear regression...")
    lr_plot = alt.Chart(lr_feature_df).mark_bar(
        color="blue",
        opacity=0.6).encode(y=alt.Y("feature_names:N",
                                    sort=alt.SortField(field="weights:Q"),
                                    title="Features"),
                            x=alt.X("weights:Q",
                                    title="Coefficient Weights")).properties(
                                        title="(B) Linear Regression",
                                        width=300,
                                        height=300)

    feature_plot = rfr_plot | lr_plot

    # Saving plot as an output
    feature_plot.save(output + "feature_plot.png", webdriver="chrome")
def showCuisines(data, zipCode):

    data = data[data['zipCode'] == zipCode]

    barRest = alt.Chart(data) \
        .mark_bar(stroke="Black")\
        .encode(alt.X("perZip:Q", axis=alt.Axis(title="Restaurant Count")),\
                alt.Y("cuisine:O", axis=alt.Axis(title="Cuisine"),\
                sort=alt.SortField(field="perZip", op="sum", order='descending')),\
                alt.ColorValue("LightGrey"))

    return barRest
def plotZip(data, zip_filter):
    try:
        data_perzip = pd.concat([data[['cuisine', 'perZip']], data.perZip.apply(pd.Series)], axis=1)[['cuisine', str(zip_filter)]]\
        .dropna().rename(index=str, columns={str(zip_filter): "total"}).sort_values(by=['total'], ascending=False)
    except:
        data_perzip = pd.DataFrame({'cuisine': [np.nan], 'total': [np.nan]})

    return alt.Chart(data_perzip).mark_bar(stroke="Black").encode(
        alt.X("total:Q", axis=alt.Axis(title="Restaurants")),
        alt.Y('cuisine:O', sort=alt.SortField(field="total", op="argmax")),
        alt.ColorValue("LightGrey"),
    ).properties(width=200)
def createChart(data):
    maxCount = int(data[0]["count"])
    barCount = alt.Chart(pd.DataFrame.from_records(data)) \
               .mark_bar(stroke="Black") \
               .encode(
                   alt.X("count:Q",
                         axis=alt.Axis(title="Number of Restaurants")),
                   alt.Y("cuisine:O", axis=alt.Axis(title="cuisine"),
                         sort=alt.SortField(field="count", order="descending",
                                            op="mean")),
                   alt.ColorValue("LightGrey")
               )
    return barCount
Beispiel #16
0
def plot_hashtags(df, text_col):
    """
    Analysis the hashtags in tweets, and plot the hashtag
    analysis.

    Parameters:
    -----------
    tweets : dataframe
        A dataframe of the user's tweets.

    tweet: string
        The column name of tweet text in dataframe.

    Returns:
    --------
    plot: chart
        A chart plotting analysis result of most frequent used hashtag words.
    """
    # Checking for valid inputs
    if not isinstance(df, pd.DataFrame):
        raise Exception("The value of the argument 'df' must be type of dataframe.")
    if type(text_col) != str:
        raise Exception("The value of the argument 'text_col' must be type of string")

    # extract hashtags from text
    df['hashtags'] = df[text_col].apply(lambda x: re.findall(r'[#]\w+', x))

    # count hashtags
    hashtag_dict = {}
    for hashtags in df["hashtags"]:
        for word in hashtags:
            hashtag_dict[word] = hashtag_dict.get(word, 0) + 1

    hashtag_df = pd.DataFrame(columns=['Keyword', 'Count'])
    for key, value in hashtag_dict.items():
        key_value = [[key, value]]
        hashtag_df = hashtag_df.append(pd.DataFrame(key_value, columns=['Keyword', 'Count']),
                                       ignore_index=True)

    # hashtag frequency plot
    hashtag_plot = alt.Chart(hashtag_df).mark_bar().encode(
        x=alt.X('Count', title="Hashtags"),
        y=alt.Y('Keyword', title="Count of Hashtags", sort='-x')
    ).properties(
        title='Top 15 Hashtag Words'
    ).transform_window(
        rank='rank(Count)',
        sort=[alt.SortField('Count', order='descending')]
    ).transform_filter((alt.datum.rank <= 15))

    return hashtag_plot
 def plotClassOrder(self):
     alt.data_transformers.disable_max_rows()
     chart2 = alt.Chart(self.dataFrame).mark_bar().encode(
         x=alt.X('sum(number)', type='quantitative', title='Total Number'),
         y=alt.Y('clasS',
                 type='nominal',
                 title='Class',
                 sort=alt.SortField('number', order='descending')),
         color='type',
         order=alt.Order(
             # Sort the segments of the bars by this field
             'type',
             sort='ascending'))
     chart2.display()
Beispiel #18
0
 def chart_area_headcount(x, select_coding, tooltip_selection):
     #.transform_calculate(order="{'TV':0, 'CG': 1,'Post':2,'Admin':3,'HR':4,'IT':5,'Pipeline':6,'Development':7}[datum.Department]")
     return alt.Chart(x).mark_area().encode(
         alt.X('yearmonth(date):T',
               axis=alt.Axis(title='date', labelAngle=90)),
         y='headcount',
         color=alt.Color(select_coding,
                         legend=alt.Legend(title=select_coding),
                         sort=alt.SortField("order", "ascending"),
                         scale=alt.Scale(scheme='tableau10')),
         # order=alt.Order('Department', sort=['TV', 'CG', 'Post', 'Admin', 'IT', 'HR', 'Pipeline','Development']),
         tooltip=tooltip_selection,
         order="order:O",
     )
Beispiel #19
0
    def _createLatestText(self, lines, field):
        # print(dir(lines))
        opacity = 0.7
        latest_text = lines.mark_text(
            align='left',
            dx=30,
            fontSize=self.mark_text_font_size,
            opacity=opacity).transform_window(
                rank='rank()',
                sort=[
                    alt.SortField('dateandtime', order='descending')
                ]).encode(text=alt.condition(
                    alt.datum.rank == 1, field, alt.value(' '), format='.1f'))

        latest_text_tick = lines.mark_tick(
            strokeDash=[1, 1], xOffset=15, size=15,
            thickness=2).transform_window(
                rank='rank()',
                sort=[alt.SortField(
                    'dateandtime',
                    order='descending')]).encode(opacity=alt.condition(
                        alt.datum.rank == 1, alt.value(opacity), alt.value(0)))

        return alt.layer(latest_text, latest_text_tick)
Beispiel #20
0
    def plotWords(dfWords):
        color_expression    = "highlight._vgsid_==datum._vgsid_"
        color_condition     = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue")
        highlight_selection = alt.selection_single(name="highlight", empty="all", on="mouseover")

        return alt.Chart(dfWords) \
            .mark_bar(stroke="Black") \
            .encode(
                alt.X("freq", axis=alt.Axis(title="Count")),
                alt.Y('word:O', axis=alt.Axis(title="Keyword"),
                      sort=alt.SortField(field="freq", op="max", order='descending')),
                alt.ColorValue("LightGrey", condition=color_condition),
            ).properties(
                selection = highlight_selection
            )
Beispiel #21
0
def plotTraceAgg(traceType, traceDf):
    source = traceDf[traceDf['root'] == traceType]
    return alt.Chart(source,
                     title='Aggregate Trace: ' + traceType).mark_bar().encode(
                         y=alt.Y('Resource Name',
                                 title='Span Name',
                                 type='nominal',
                                 sort=alt.SortField('Average order')),
                         x=alt.X("Average loc start:Q", title="Duration"),
                         x2="Average loc end:Q",
                         color=alt.Color('Error Rate:Q', sort='ascending'),
                         tooltip=[
                             'Average Duration:Q', 'Average Data Transfered:Q',
                             'Error Rate:Q'
                         ])
Beispiel #22
0
    def plot_bars(counts, n=20):
        '''
        plot the bar chart for most common words
        '''
        import altair as alt
        import pandas as pd

        data = pd.DataFrame(counts.most_common(n),
                            columns=['category', 'count'])

        chart = alt.Chart(data).mark_bar().encode(
            x='count',
            y=alt.Y('category',
                    sort=alt.SortField(field='freq', order='ascending')))

        return chart
Beispiel #23
0
def createchart(data):

    #color_expression    = "(indexof(lower(datum.cuisine)) || (highlight._vgsid_==datum._vgsid_)"
    #color_condition     = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue")
    #highlight_selection = alt.selection_single(name="highlight", on="mouseover", empty="none")
    #search_selection    = alt.selection_single(name="search", on="mouseover", empty="none", fields=["term"],
    #bind=alt.VgGenericBinding('input'))

    chart = alt.Chart(data) \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("total:Q", axis=alt.Axis(title="Restaurants")),
            alt.Y('cuisine:O', sort=alt.SortField(field="total", op="argmax")),
            alt.ColorValue("LightGrey"),
        )

    return chart
Beispiel #24
0
def medal_rank(dataset, medal_type='Total'):
    base = alt.Chart(dataset, title='2018 Medal Summary').mark_bar().encode(
        y=alt.Y('Country:N', sort='-x'),
        x=alt.X('total:Q',
                scale=alt.Scale(domain=(0, 40)))).transform_joinaggregate(
                    total=f'sum({medal_type})',
                    groupby=['Country']).transform_window(
                        rank='rank(total)',
                        sort=[alt.SortField('total', order='descending')
                              ]).transform_filter(alt.datum.rank < 10)
    if medal_type == 'Gold':
        return base.mark_bar(color='gold')
    elif medal_type == 'Silver':
        return base.mark_bar(color='silver')
    elif medal_type == 'Bronze':
        return base.mark_bar(color='darkorange')
    else:
        return base
Beispiel #25
0
def showRatingDistribution(df, name=''):
    zips = name

    try:
        data = df.sort_values(by=zips, ascending=False).head(15)
    except KeyError:
        return alt.Chart(pd.DataFrame()).mark_bar()
    else:

        color_expression = "highlight._vgsid_==datum._vgsid_"
        color_condition = alt.ConditionalPredicateValueDef(
            color_expression, "SteelBlue")

        ## There are two types of selection in our chart:
        ## (1) A selection for highlighting a bar when the mouse is hovering over
        highlight_selection = alt.selection_single(name="highlight",
                                                   empty="all",
                                                   on="mouseover")

        ## (2) A selection for updating the rating distribution when the mouse is clicked
        ## Note the encodings=['y'] parameter is needed to specify that once a selection
        ## is triggered, it will propagate the encoding channel 'y' as a condition for
        ## any subsequent filter done on this selection. In short, it means use the data
        ## field associated with the 'y' axis as a potential filter condition.
        # rating_selection    = alt.selection_single(name="rating", empty="all", encodings=['y'])

        ## We need to compute the max count to scale our distribution appropriately
        #    maxCount            = int(data[zips].max())

        ## Our visualization consists of two bar charts placed side by side. The first one
        ## sorts the apps by their average ratings as below. Note the compound selection
        ## that is constructed by adding the two selections together.
        barMean = alt.Chart(data) \
            .mark_bar(stroke="Black") \
            .encode(
                alt.X(zips+":Q", axis=alt.Axis(title="Restuarants")),
                alt.Y('cuisine:O', axis=alt.Axis(title="Cuisine".format(name)),
                      sort=alt.SortField(field=zips, op="max", order='descending')),
                alt.ColorValue("LightGrey", condition=color_condition),
            ).properties(
                selection = highlight_selection
            )

        return barMean
Beispiel #26
0
def createChart(data, zip):
    color_expression = "(highlight._vgsid_==datum._vgsid_)"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               on="mouseover",
                                               empty="none")


    vis2 = alt.Chart(data) \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("total:Q", axis=alt.Axis(title="Restaurants")),
            alt.Y('cuisine:O', sort=alt.SortField(field="total", op="argmax")),
            alt.ColorValue("LightGrey", condition=color_condition),
            ).properties(
                selection=(highlight_selection),
                )
    return vis2
Beispiel #27
0
def createChart(data, name):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")
    rating_selection = alt.selection_single(name="rating",
                                            empty="all",
                                            encodings=['y'])
    maxCount = int(data['rating'].value_counts().max())

    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("mean(rating):Q", axis=alt.Axis(title="Rating")),
            alt.Y('name:O', axis=alt.Axis(title="{} App Name".format(name)),
                  sort=alt.SortField(field="rating", op="mean", order='descending')),
            alt.ColorValue("LightGrey", condition=color_condition),
        ).properties(
            selection = highlight_selection+rating_selection,
        )

    barRating = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("rating:O", axis=alt.Axis(title="Rating"),
                  scale=alt.Scale(type='band', domain=list(range(1,6))),
                 ),
            alt.Y("count()", axis=alt.Axis(title="Number of Ratings"),
                  scale=alt.Scale(domain=(0,maxCount)),
                 ),
        ).properties(
            selection = highlight_selection,
        ).transform_filter(
            rating_selection.ref()
        )

    return alt.hconcat(barMean,
                       barRating,
                       data=data,
                       title="{} App Ratings".format(name))
Beispiel #28
0
def external_causes_category_chart(data):
    ed = data[data['chapter'] == 'External causes of morbidity and mortality']

    causeCounts = (
        ed
        .groupby("cause_of_death_map")
        .agg({"count": "sum"})
        .reset_index()
        .sort_values("count", ascending=False)
        .head(10)
    )

    alt.Chart(causeCounts).mark_bar(color="orange").encode(
        y=alt.Y("cause_of_death_map", sort=alt.SortField("sum", "count", order="descending")),
        x=alt.X("count"),
        tooltip=causeCounts.columns.tolist()
    ).properties(
        width=800,
        height=400
    )
Beispiel #29
0
 def _make_barplot(
     self,
     db_name,
     enrich_result_df,
     pvalue_thresh,
     ranking_field,
     color_field,
     tooltip_fields,
     color_scheme,
     with_href,
     size,
 ):
     data = enrich_result_df.query(f"adjusted_pvalue < {pvalue_thresh}")
     if size:
         data = data.sort_values(f"{ranking_field}").tail(size)
     barchart = (
         alt.Chart(data)
         .mark_bar()
         .encode(
             x=f"{ranking_field}:Q",
             y=alt.Y(
                 "term_name:N",
                 sort=alt.SortField(field=ranking_field, order="descending"),
                 title=None,
             ),
             color=alt.Color(
                 f"{color_field}:Q",
                 scale=alt.Scale(scheme=color_scheme),
                 sort="descending",
             ),
             tooltip=tooltip_fields,
         )
         .properties(title=db_name)
     )
     if with_href:
         barchart = barchart.encode(href="url:N").transform_calculate(
             url="https://www.google.com/search?q=" + alt.datum.term_name
         )
     return barchart
def get_none_accepted_graph():
    """returns graph with countries that were always declined
    
       Parameters: None

       Returns: Graph

    """
    df_nation = get_nations_df()
    df_nation_percentage_sort_asc = df_nation.sort_values(
        'Verhältnis der Asylgewährungen zu Asylgesuchen')
    all_rejected = df_nation_percentage_sort_asc[
        'Total Asylgewährungen pro Land'] == 0
    df_nation_percentage_sort_asc = df_nation_percentage_sort_asc[all_rejected]
    df_nation_percentage_sort_asc.drop(df_nation_percentage_sort_asc.index[
        df_nation_percentage_sort_asc['Total Asylgesuche pro Land'] == 0],
                                       inplace=True)
    df_nation_percentage_sort_asc = df_nation_percentage_sort_asc.sort_values(
        'Total Asylgesuche pro Land', ascending=False)
    df_nation_percentage_sort_asc = df_nation_percentage_sort_asc.iloc[:10]

    df_nation_declined = df_nation.sort_values('Abschreibungen',
                                               ascending=False)
    df_nation_declined = df_nation_declined.iloc[:10]

    chart_always_declined = alt.Chart(
        df_nation_percentage_sort_asc).mark_bar().encode(
            x='Total Asylgesuche pro Land:Q',
            y=alt.Y('Nation:O',
                    sort=alt.SortField(field="Total Asylgesuche pro Land",
                                       order='descending')))

    text_always_declined = chart_always_declined.mark_text(
        align='left', baseline='middle',
        dx=3).encode(text='Total Asylgesuche pro Land:Q')
    return alt.hconcat(
        (chart_always_declined + text_always_declined).properties(
            width=250, height=150, title='Keine angenommenen Gesuche'))