예제 #1
0
def createChart(data, zipcode):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")
    #rating_selection    = alt.selection_single(name="rating", empty="all", encodings=['y'])
    data = data[data['zipcode'] == zipcode].nlargest(20, 'num')
    try:
        maxCount = int(data['num'].max())
    except ValueError:
        maxCount = 10
        data = pd.DataFrame([{"name": "undefine", "num": 0}])

    return alt.Chart(data)\
              .mark_bar(stroke="Black")\
              .encode(
                  alt.X("num:Q", axis=alt.Axis(title="Restaurants"),
                    scale=alt.Scale(domain=(0,maxCount))),
                  alt.Y('name:O', axis=alt.Axis(title="cuisine"),
                    sort=alt.SortField(field="num", op="argmax")),
                  alt.ColorValue("LightGrey", condition=color_condition),
              ).properties(
                selection = highlight_selection,
              )
예제 #2
0
def createChart(data, name=''):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")


    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("rating:Q", axis=alt.Axis(title="The number of restaurants")),
            alt.Y('name:O', axis=alt.Axis(title="Cuisines".format(name)),
                  sort=alt.SortField(field="rating", op="mean", order='descending')),
            alt.ColorValue("LightGrey", condition=color_condition),
        ).properties(
            selection = highlight_selection,
        )

    return alt.hconcat(
        barMean,
        data=data,
        title="The number of restaurants ({} in NYC) - Top 25 cuisines".format(
            name))
예제 #3
0
def createChart(data, zipcode):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")
    try:
        data = pd.concat([data['cuisine'], data['perZip' + zipcode]],
                         axis=1).nlargest(20, 'perZip' + zipcode)
        maxCount = int(data['perZip' + zipcode].max())
    except KeyError:
        maxCount = 1
        data = pd.DataFrame([{"cuisine": "", 'perZip' + zipcode: 0}])

    return alt.Chart(data) \
              .mark_bar(stroke="Black") \
              .encode(
                  alt.X('perZip'+zipcode+':Q', axis=alt.Axis(title="Restaurants"),
                    scale=alt.Scale(domain=(0,maxCount))),
                  alt.Y('cuisine:O', axis=alt.Axis(title="cuisine"),
                    sort=alt.SortField(field='perZip'+zipcode, op="argmax")),
                  alt.ColorValue("LightGrey", condition=color_condition),
              ).properties(
                selection = highlight_selection,
              )
예제 #4
0
def draw_chart(data, zipcode):
    # color expression, highlights bar on mouseover
    color_expression    = "highlight._vgsid_==datum._vgsid_"
    color_condition     = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue")

    #highlight bar when mouse is hovering
    highlight_selection = alt.selection_single(name="highlight", empty="all", on="mouseover")
    try:
        data = pd.concat([data["cuisine"], data["perZip"+zipcode]], axis=1).nlargest(25, "perZip"+zipcode)
        maxCount = int(data["perZip"+zipcode].max())
    except KeyError:
        maxCount = 1
        data = pd.DataFrame([{"cuisine":"", "perZip"+zipcode:0}])

    return alt.Chart(data) \
              .mark_bar(stroke="Black") \
              .encode(
                  alt.X("perZip"+zipcode+":Q",
                  axis=alt.Axis(title="Number of Restaurants"),
                    scale=alt.Scale(domain=(0,maxCount))),
                  alt.Y("cuisine:O", axis=alt.Axis(title="Cuisine Type"),
                    sort=alt.SortField(field="perZip"+zipcode,
                    op="argmax")),
                  alt.ColorValue("LightGrey",
                  condition=color_condition),
              ).properties(
                selection = highlight_selection,
              )
예제 #5
0
    def plotWords(dfWords):
        color_expression    = "highlight._vgsid_==datum._vgsid_"
        color_condition     = alt.ConditionalPredicateValueDef(color_expression, "SteelBlue")
        highlight_selection = alt.selection_single(name="highlight", empty="all", on="mouseover")

        return alt.Chart(dfWords) \
            .mark_bar(stroke="Black") \
            .encode(
                alt.X("freq", axis=alt.Axis(title="Count")),
                alt.Y('word:O', axis=alt.Axis(title="Keyword"),
                      sort=alt.SortField(field="freq", op="max", order='descending')),
                alt.ColorValue("LightGrey", condition=color_condition),
            ).properties(
                selection = highlight_selection
            )
예제 #6
0
def showRatingDistribution(df, name=''):
    zips = name

    try:
        data = df.sort_values(by=zips, ascending=False).head(15)
    except KeyError:
        return alt.Chart(pd.DataFrame()).mark_bar()
    else:

        color_expression = "highlight._vgsid_==datum._vgsid_"
        color_condition = alt.ConditionalPredicateValueDef(
            color_expression, "SteelBlue")

        ## There are two types of selection in our chart:
        ## (1) A selection for highlighting a bar when the mouse is hovering over
        highlight_selection = alt.selection_single(name="highlight",
                                                   empty="all",
                                                   on="mouseover")

        ## (2) A selection for updating the rating distribution when the mouse is clicked
        ## Note the encodings=['y'] parameter is needed to specify that once a selection
        ## is triggered, it will propagate the encoding channel 'y' as a condition for
        ## any subsequent filter done on this selection. In short, it means use the data
        ## field associated with the 'y' axis as a potential filter condition.
        # rating_selection    = alt.selection_single(name="rating", empty="all", encodings=['y'])

        ## We need to compute the max count to scale our distribution appropriately
        #    maxCount            = int(data[zips].max())

        ## Our visualization consists of two bar charts placed side by side. The first one
        ## sorts the apps by their average ratings as below. Note the compound selection
        ## that is constructed by adding the two selections together.
        barMean = alt.Chart(data) \
            .mark_bar(stroke="Black") \
            .encode(
                alt.X(zips+":Q", axis=alt.Axis(title="Restuarants")),
                alt.Y('cuisine:O', axis=alt.Axis(title="Cuisine".format(name)),
                      sort=alt.SortField(field=zips, op="max", order='descending')),
                alt.ColorValue("LightGrey", condition=color_condition),
            ).properties(
                selection = highlight_selection
            )

        return barMean
예제 #7
0
def createChart(data, zip):
    color_expression = "(highlight._vgsid_==datum._vgsid_)"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               on="mouseover",
                                               empty="none")


    vis2 = alt.Chart(data) \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("total:Q", axis=alt.Axis(title="Restaurants")),
            alt.Y('cuisine:O', sort=alt.SortField(field="total", op="argmax")),
            alt.ColorValue("LightGrey", condition=color_condition),
            ).properties(
                selection=(highlight_selection),
                )
    return vis2
예제 #8
0
def createChart(data, name):
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")
    rating_selection = alt.selection_single(name="rating",
                                            empty="all",
                                            encodings=['y'])
    maxCount = int(data['rating'].value_counts().max())

    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("mean(rating):Q", axis=alt.Axis(title="Rating")),
            alt.Y('name:O', axis=alt.Axis(title="{} App Name".format(name)),
                  sort=alt.SortField(field="rating", op="mean", order='descending')),
            alt.ColorValue("LightGrey", condition=color_condition),
        ).properties(
            selection = highlight_selection+rating_selection,
        )

    barRating = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("rating:O", axis=alt.Axis(title="Rating"),
                  scale=alt.Scale(type='band', domain=list(range(1,6))),
                 ),
            alt.Y("count()", axis=alt.Axis(title="Number of Ratings"),
                  scale=alt.Scale(domain=(0,maxCount)),
                 ),
        ).properties(
            selection = highlight_selection,
        ).transform_filter(
            rating_selection.ref()
        )

    return alt.hconcat(barMean,
                       barRating,
                       data=data,
                       title="{} App Ratings".format(name))
예제 #9
0
def createChart(data, zip):
    color_expression = "(indexof(lower(datum.cuisine), search.term)>=0) || (highlight._vgsid_==datum._vgsid_)"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")
    highlight_selection = alt.selection_single(name="highlight",
                                               on="mouseover",
                                               empty="none")
    search_selection = alt.selection_single(name="search",
                                            on="mouseover",
                                            empty="none",
                                            fields=["term"],
                                            bind=alt.VgGenericBinding('input'))

    vis2 = alt.Chart(data) \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("total:Q", axis=alt.Axis(title="Restaurants")),
            alt.Y('cuisine:O', sort=alt.SortField(field="total", op="argmax")),
            alt.ColorValue("LightGrey", condition=color_condition),
            ).properties(
                selection=(highlight_selection + search_selection),
                )
    return vis2
예제 #10
0
def showRatingDistribution(data, name=''):
    """Create an interaactive visualization showing the distribution of ratings
        
        Args:
        data (DataFrame): the input data frame that must at least consists
        two columns 'name' and 'rating' for app names and ratings.
        name (str): the name of the platform (optional) to be displayed.
        
        Return:
        Chart: an Altair chart object that corresponds to the visualization
        """
    ## The color expression for highlighting the bar under mouse
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")

    ## There are two types of selection in our chart:
    ## (1) A selection for highlighting a bar when the mouse is hovering over
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")

    ## (2) A selection for updating the rating distribution when the mouse is clicked
    ## Note the encodings=['y'] parameter is needed to specify that once a selection
    ## is triggered, it will propagate the encoding channel 'y' as a condition for
    ## any subsequent filter done on this selection. In short, it means use the data
    ## field associated with the 'y' axis as a potential filter condition.
    rating_selection = alt.selection_single(name="PROVIDER",
                                            empty="all",
                                            encodings=['y'])

    ## We need to compute the max count to scale our distribution appropriately
    maxCount_BORO = int(data['BORO'].value_counts().max())
    maxCount_SSID = int(data['PROVIDER'].value_counts().max())

    ## Our visualization consists of two bar charts placed side by side. The first one
    ## sorts the apps by their average ratings as below. Note the compound selection
    ## that is constructed by adding the two selections together.
    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
                alt.Y('BORO:O', axis=alt.Axis(title="Location of Hotspot"),
                      sort=alt.SortField(field="BORO", op="count", order='descending'),
                      ),
                alt.X("count()", axis=alt.Axis(title="Number of Hotspot"),
                      scale = alt.Scale(domain=(0,maxCount_BORO)),
                      ),
                alt.ColorValue("LightGrey", condition=color_condition),
                ).properties(
                             selection = highlight_selection+rating_selection
                             )

    ## The second one uses the selected app specified by the rating_selection
    ## to filter the data, and build a histogram based on the ratings. Note
    ## the use of rating_selection.ref() as a condition for transform_filter().
    ## The scale was explicitly constructed for the X axis to fill out the
    ## the potential empty values, e.g. no one gave an app a score of 3, but
    ## we still want to show 1, 2, 3, 4, and 5 in the axis (but not in with .5).
    barRating = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
                alt.X("PROVIDER:O", axis=alt.Axis(title="PROVIDER"),
                      sort=alt.SortField(field="PROVIDER", op="count", order='descending'),
                      ),
                alt.Y("count()", axis=alt.Axis(title="Number of Hotspot"),
                      scale=alt.Scale(domain=(0,maxCount_SSID)),
                      ),
                alt.ColorValue("LightGrey"),
                ).properties(
                             selection = highlight_selection
                             ).transform_filter(
                                                rating_selection.ref()
                                                )

    states = "https://raw.githubusercontent.com/hvo/datasets/master/nyc_zip.geojson"

    # US states background
    background = alt.Chart(states).mark_geoshape(
        fill='lightgray',
        stroke='white').properties(title='Map', width=500,
                                   height=500).project('albersUsa')

    points = alt.Chart(data).mark_point(filled=True, size=200).encode(
        longitude='LON:Q',
        latitude='LAT:Q',
        color=alt.value('SteelBlue'),
        size=alt.value(30)).transform_filter(rating_selection.ref())

    ## We just need to concatenate the plots horizontally, and return the result.
    return alt.hconcat(alt.vconcat(
        barMean,
        barRating,
        data=data,
        title="{} Hotspot Distribution".format(name)), (background + points),
                       data=data)
예제 #11
0
def showRatingDistribution(data, name=''):
    """Create an interactive visualization showing the distribution of ratings

    Args:
        data (DataFrame): the input data frame that must at least consists
            two columns 'name' and 'rating' for app names and ratings.
        name (str): the name of the platform (optional) to be displayed.

    Return:
        Chart: an Altair chart object that corresponds to the visualization
    """
    # The color expression for highlighting the bar under mouse
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")

    # There are two types of selection in our chart:
    # (1) A selection for highlighting a bar when the mouse is hovering over
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")

    # (2) A selection for updating the rating distribution when the mouse is clicked
    # Note the encodings=['y'] parameter is needed to specify that once a selection
    # is triggered, it will propagate the encoding channel 'y' as a condition for
    # any subsequent filter done on this selection. In short, it means use the data
    # field associated with the 'y' axis as a potential filter condition.
    rating_selection = alt.selection_single(name="rating",
                                            empty="all",
                                            encodings=['y'])

    # We need to compute the max count to scale our distribution appropriately
    maxCount = int(data['rating'].value_counts().max())

    # Our visualization consists of two bar charts placed side by side. The first one
    # sorts the apps by their average ratings as below. Note the compound selection
    # that is constructed by adding the two selections together.
    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("mean(rating):Q", axis=alt.Axis(
                title="Average Rating", tickCount=min(maxCount, 6))),
            alt.Y('name:O', axis=alt.Axis(title="{} App Name".format(name)),
                  sort=alt.EncodingSortField(field="rating", op="mean", order='descending')),
            alt.ColorValue("LightGrey", condition=color_condition),
    ) \
        .add_selection(highlight_selection) \
        .add_selection(rating_selection)

    # The second one uses the selected app specified by the rating_selection
    # to filter the data, and build a histogram based on the ratings. Note
    # the use of rating_selection.ref() as a condition for transform_filter().
    # The scale was explicitly constructed for the X axis to fill out the
    # the potential empty values, e.g. no one gave an app a score of 3, but
    # we still want to show 1, 2, 3, 4, and 5 in the axis (but not in with .5).
    barRating = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
            alt.X("rating:O", axis=alt.Axis(title="Rating"),
                  scale=alt.Scale(type='band', domain=list(range(1, 6))),
                  ),
            alt.Y("count()", axis=alt.Axis(title="Number of Ratings"),
                  scale=alt.Scale(domain=(0, maxCount)),
                  ),
            alt.ColorValue("LightGrey"),
    ).add_selection(highlight_selection) \
        .transform_filter(
            rating_selection.ref()
    )

    # We just need to concatenate the plots horizontally, and return the result.
    return alt.hconcat(barMean,
                       barRating,
                       data=data,
                       title="{} App Ratings".format(name))