Exemple #1
0
def explore_dataset(users, movies, ratings):
    # Exploring the MovieLens Data (Users)
    print(users.describe())  # User features
    print(users.describe(include=[np.object]))  # Categorical user features

    # Create filters to slice the data
    occupation_filter = alt.selection_multi(fields=["occupation"])
    occupation_chart = alt.Chart().mark_bar().encode(
        x="count()",
        y=alt.Y("occupation:N"),
        color=alt.condition(
            occupation_filter,
            alt.Color("occupation:N", scale=alt.Scale(scheme='category20')),
            alt.value("lightgray")),
    ).properties(width=300, height=300, selection=occupation_filter)

    # Create the chart
    users_ratings = (ratings.groupby('user_id', as_index=False).agg({
        'rating': ['count', 'mean']
    }).flatten_cols().merge(users, on='user_id'))

    # Create a chart for the count, and one for the mean.
    altair_viewer.show(
        alt.hconcat(filtered_hist('rating count', '# ratings / user',
                                  occupation_filter),
                    filtered_hist('rating mean', 'mean user rating',
                                  occupation_filter),
                    occupation_chart,
                    data=users_ratings))

    # Exploring the MovieLens Data (Movies)
    movies_ratings = get_movie_ratings(movies, ratings)

    genre_filter, genre_chart = filter_and_chart()

    (movies_ratings[['title', 'rating count',
                     'rating mean']].sort_values('rating count',
                                                 ascending=False).head(10))

    (movies_ratings[['title', 'rating count', 'rating mean']].mask(
        'rating count',
        lambda x: x > 20).sort_values('rating mean', ascending=False).head(10))

    # Display the number of ratings and average rating per movie
    altair_viewer.show(
        alt.hconcat(filtered_hist('rating count', '# ratings / movie',
                                  genre_filter),
                    filtered_hist('rating mean', 'mean movie rating',
                                  genre_filter),
                    genre_chart,
                    data=movies_ratings))
def movie_embedding_norm(models):
    """Visualizes the norm and number of ratings of the movie embeddings.
    Args:
      model: A MFModel object.
    """
    if not isinstance(models, list):
        models = [models]
    df = pd.DataFrame({
        'title': movies['title'],
        'genre': movies['genre'],
        'num_ratings': movies_ratings['rating count'],
    })
    charts = []
    brush = alt.selection_interval()
    for i, model in enumerate(models):
        norm_key = 'norm' + str(i)
        df[norm_key] = np.linalg.norm(model.embeddings["movie_id"], axis=1)
        nearest = alt.selection(
            type='single', encodings=['x', 'y'], on='mouseover', nearest=True,
            empty='none')
        base = alt.Chart().mark_circle().encode(
            x='num_ratings',
            y=norm_key,
            color=alt.condition(brush, alt.value('#4c78a8'), alt.value('lightgray'))
        ).properties(
            selection=nearest).add_selection(brush)
        text = alt.Chart().mark_text(align='center', dx=5, dy=-5).encode(
            x='num_ratings', y=norm_key,
            text=alt.condition(nearest, 'title', alt.value('')))
        charts.append(alt.layer(base, text))
    return altair_viewer.show(alt.hconcat(*charts, data=df))
Exemple #3
0
def bias_line_chart(data3):
    """
    docstring
    """
    Chart3 = alt.Chart(data3).mark_line().encode(
    x='variance',
    y='bias_squared'
    ).interactive()
    return altair_viewer.show(Chart3)
Exemple #4
0
def test_bar(data):
    """
    A test bar chart 
    """
    Chart = alt.Chart(data).mark_bar().encode(
        x='x',
        y='y',
    ).interactive()
    return altair_viewer.show(Chart)
Exemple #5
0
def basic_bar(data2):
    """
    for a basic chart, convert data to a dataframe or other accepted data sources
    """
    Chart2 = alt.Chart(data2).mark_bar().encode(
        x='movies',
        y='num_oscars',
        color='movies',
    ).interactive()
    return altair_viewer.show(Chart2)
Exemple #6
0
def bar_chart(data2):
    """
    A bar chart, like the one above but with small custom alterations title and size
    """
    
    Chart2 = alt.Chart(data2).mark_bar().encode(
        alt.X ('movies', title="My Favorite Movies"),
        alt.Y ('num_oscars', title="# of Academy Awards"),
        color='movies',
    ).properties(width=200).interactive()
    return altair_viewer.show(Chart2)
def visualize_movie_embeddings(data, x, y):
    nearest = alt.selection(
        type='single', encodings=['x', 'y'], on='mouseover', nearest=True,
        empty='none')
    base = alt.Chart().mark_circle().encode(
        x=x,
        y=y,
        color=alt.condition(genre_filter, "genre", alt.value("whitesmoke")),
    ).properties(
        width=600,
        height=600,
        selection=nearest)
    text = alt.Chart().mark_text(align='left', dx=5, dy=-5).encode(
        x=x,
        y=y,
        text=alt.condition(nearest, 'title', alt.value('')))
    return altair_viewer.show(alt.hconcat(alt.layer(base, text), genre_chart, data=data))
Exemple #8
0
def av(chart):
    print(chart.to_json())
    altview.show(chart)
    """
    base = alt.Chart().mark_bar().encode(x=alt.X(field, bin=alt.Bin(maxbins=10), title=label),
                                         y="count()", ).properties(width=300, )
    return alt.layer(
        base.transform_filter(filter),
        base.encode(color=alt.value('lightgray'), opacity=alt.value(.7)), ).resolve_scale(y='independent')


# Create the chart
users_ratings = (
    ratings.groupby('user_id', as_index=False).agg({'rating': ['count', 'mean']}).flatten_cols().merge(users,
                                                                                                       on='user_id'))

# Create a chart for the count, and one for the mean.
altair_viewer.show(
    alt.hconcat(filtered_hist('rating count', '# ratings / user', occupation_filter),
                filtered_hist('rating mean', 'mean user rating', occupation_filter), occupation_chart,
                data=users_ratings))

# Exploring the MovieLens Data (Movies)
movies_ratings = movies.merge(
    ratings.groupby('movie_id', as_index=False).agg({'rating': ['count', 'mean']}).flatten_cols(), on='movie_id')

genre_filter = alt.selection_multi(fields=['genre'])
genre_chart = alt.Chart().mark_bar().encode(x="count()", y=alt.Y('genre'), color=alt.condition(
    genre_filter, alt.Color("genre:N"), alt.value('lightgray'))).properties(height=300, selection=genre_filter)

(movies_ratings[['title', 'rating count', 'rating mean']]
 .sort_values('rating count', ascending=False)
 .head(10))

(movies_ratings[['title', 'rating count', 'rating mean']]
Exemple #10
0
    cornerRadiusTopLeft=3, cornerRadiusTopRight=3).encode(
        x='category:N',
        y=alt.Y('sum(positive_comments):Q',
                title='Sum of Positive comments',
                axis=alt.Axis(labelOverlap=True)),
        color='category:N').properties(
            title='Distribution of Positive Comments', width=300)

chart2 = alt.Chart(df).mark_bar(
    cornerRadiusTopLeft=3, cornerRadiusTopRight=3).encode(
        x='category:N',
        y=alt.Y('sum(negative_comments):Q',
                title='Sum of Negative comments',
                axis=alt.Axis(labelOverlap=True)),
        color='category:N').properties(
            title='Distribution of Negative Comments', width=300)

chart3 = alt.Chart(df).mark_bar(
    cornerRadiusTopLeft=3, cornerRadiusTopRight=3).encode(
        x='category:N',
        y=alt.Y('sum(neutral_comments):Q',
                title='Sum of Neutral comments',
                axis=alt.Axis(labelOverlap=True)),
        color='category:N').properties(
            title='Distribution of Neutral Comments', width=300)

concat = alt.hconcat(chart1, chart2,
                     chart3).resolve_scale(y='shared').configure_title(
                         fontSize=15, offset=5, orient='top', anchor='middle')
viewer.show(concat)
Exemple #11
0
 def open_chart_in_browser(chart: alt.Chart) -> NoReturnType:
     # a helpful wrapper around altair_viewer.altview
     altview.show(chart)
Exemple #12
0
def plot_both(county='Alle_Fylker',
              periodicity='day',
              start=None,
              end=None,
              plot_lib='Altair',
              view_plot=False,
              plot_title=False,
              width=800,
              height=500):
    """
    Function to generate a line-plot for Covid19's total daily cases from
    FHI/Norway.

    Parameters:
    ~~~~~~~~~~~~~~~~~~~~
    :param county: str: Name od the county to be analyzed, which could
                        be  'Agder', 'Innlandet', 'More_og_Romsdal',
                        'Nordland', 'Oslo', 'Rogaland',
                        'Troms_og_Finnmark', Trondelag',
                        'Vestfold_og_Telemark', 'Vestland',
                        'Viken'or 'Alle_Fylker' for all counties.
    :param periodicity: str: Periodicity to display the plot, which could
                             be 'day' or 'week'. Default 'day'.
    :param start: str or date object: The starting date for the analysis.
    :param end: str or date object: The ending date for the analysis.
    :param plot_lib: str: The name of the plotting library to be used.
                          Default is 'Altair' which uses Altair. A option
                          is 'plt' which uses matplotlib.
    :param view_plot: bool: The default is False to not show the plot.
    :param plot_title: bool: The default is False to not display the
                             plot's title.
    :param width: int: Only used for 'Altair' plot. Define the width of
                       the plotting. Default is 800.
    :param height: int: Only used for 'Altair' plot. Define the height of
                       the plotting. Default is 500.

    Notes:
    ~~~~~~~~~~~~~~~~~~~~
    - If start and end are None, them it will plot the entire data-base.
    """
    # creating Matplotlib or Seaborn plot
    if plot_lib == 'plt':

        # plotting cumulative cases
        plot_cumulative_cases(county=county,
                              start=start,
                              end=end,
                              plot_lib=plot_lib,
                              view_plot=False,
                              plot_title=False)

        # setting plots together but with independent y-axis
        plt.twinx()

        # plotting daily reported cases
        plot_reported_cases(county=county,
                            start=start,
                            end=end,
                            plot_lib=plot_lib,
                            view_plot=False,
                            plot_title=False)

        # setting plot's title
        if plot_title is True:
            plt.title(f'Data Vs (cumulative and daily cases) ' f'for {county}')

        # viewing plot
        if view_plot is True:
            plt.show()

    elif plot_lib == 'Altair':
        # setting fixed parameters
        col1 = 'Kumulativt antall'
        title1 = 'Cumulative nrs.'

        col2 = 'Nye tilfeller'
        title2 = 'New report nrs.'

        # setting prefix and suffix for the data-files
        path = 'data/'
        if periodicity == 'day':
            prefix = path + 'antall-meldte-covid-19-t_day_'

        elif periodicity == 'week':
            prefix = path + 'antall-meldte-covid-19-t_week_'

        else:
            raise ValueError("Error: The given periodicity is not valid.")

        suffix = '.csv'

        # reading data as DataFrame
        df = pd.read_csv(filepath_or_buffer=prefix + county + suffix,
                         sep=',',
                         index_col='Dato',
                         infer_datetime_format=True)

        # setting data range
        df = df[start:end]

        # fixing datetime for Altair
        df.reset_index(inplace=True)

        base = None
        if periodicity == 'day':
            df['Dato'] = pd.to_datetime(df["Dato"], dayfirst=True)

            # creating base for Altair plot as a function of daily-dates
            base = alt.Chart(df).encode(
                alt.X('Dato:T',
                      axis=alt.Axis(format='%Y-%m-%d'),
                      scale=alt.Scale(zero=False)))

        if periodicity == 'week':
            # creating base for Altair plot as a function of weekly-dates
            base = alt.Chart(df).encode(
                alt.X('Dato', scale=alt.Scale(zero=False)))

        # drawing bars
        bar = base.mark_bar(opacity=0.3).encode(y=alt.Y(col2,
                                                        axis=alt.Axis(
                                                            title=title2,
                                                            orient='left')), )

        # drawing line
        line = base.mark_line(color='red').encode(y=alt.Y(
            col1, axis=alt.Axis(title=title1, orient='right')), )

        # creating x-axis selections
        nearest = alt.selection(type='single',
                                nearest=True,
                                on='mouseover',
                                fields=['Dato'],
                                empty='none')

        # Transparent selectors across the chart. This is what tells us
        # the x-value of the cursor
        selectors = alt.Chart(df).mark_point().encode(
            x='Dato',
            opacity=alt.value(0),
        ).add_selection(nearest)

        # drawing points on the line, and highlight based on selection
        points = line.mark_point().encode(
            opacity=alt.condition(nearest, alt.value(1), alt.value(0)))

        # drawing a rule at the location of the selection
        rules = alt.Chart(df).mark_rule(color='gray').encode(
            x='Dato', ).transform_filter(nearest)

        # setting dual-axis
        chart = alt.layer(line, bar, selectors, points,
                          rules).resolve_scale(y='independent')

        # setting properties and interactiveness
        chart = chart.properties(
            width=width, height=height,
            title=f'Covid19 reports for {county}').interactive()

        # adding tooltips
        chart = chart.encode(tooltip=[
            alt.Tooltip('Dato', title='Date'),
            alt.Tooltip(col1, title=title1),
            alt.Tooltip(col2, title=title2)
        ])

        # viewing the plot
        if view_plot is True:
            altair_viewer.show(chart)
            # altair_viewer.display(chart)

        return chart

    else:
        raise ValueError(f"Error: Plot type {plot_lib} not implemented.")