Ejemplo n.º 1
0
def make_plot(year):

    df = data.movies()

    df["Release_Date"] = pd.to_datetime(
        df["Release_Date"])  # Converting to datetime format
    df["year"] = pd.DatetimeIndex(
        df['Release_Date']).year  # Creating years column for future use
    df = df.query(
        "year < 2010"
    )  # years above 2010 are either mislabeled or the sample size is too small

    df["International_Gross"] = df["Worldwide_Gross"] - df["US_Gross"]

    df_boxoffice = df[["International_Gross", "year", "US_Gross"]]

    df_boxoffice = df_boxoffice.melt(id_vars="year",
                                     var_name="type",
                                     value_name="dollars")

    bo_chart = alt.Chart(df_boxoffice).mark_bar().encode(
        alt.Y('dollars', aggregate="mean", axis=alt.Axis(title='Dollars')),
        alt.X('year:O', axis=alt.Axis(title='Year')),
        color="type"
    ).properties(
        title=
        "Average box office of US movies over time overlayed with production budget"
    )

    return bo_chart
Ejemplo n.º 2
0
"""
Trellis Scatter Plot
-----------------------
This example shows how to make a trellis scatter plot.
"""

import altair as alt
from vega_datasets import data

source = data.movies()

chart = alt.Chart(source).mark_point().encode(x='Worldwide_Gross',
                                              y='US_DVD_Sales',
                                              column='MPAA_Rating')
        df.groupby('sex').tip.mean().values,
        df.groupby(['sex', 'time']).tip.mean().values),
    marker=dict(colors=px.colors.sequential.Emrld)),
                layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)',
                                 plot_bgcolor='rgba(0,0,0,0)'))

fig.update_layout(margin=dict(t=0, l=0, r=0, b=0),
                  title_text='Tipping Habbits Per Gender, Time and Day')
fig.show()

#_____________________________________________________________________
# 3. Parallel Categories
import plotly.express as px
from vega_datasets import data
import pandas as pd
df = data.movies()
df = df.dropna()
df['Genre_id'] = df.Major_Genre.factorize()[0]
fig = px.parallel_categories(
    df,
    dimensions=['MPAA_Rating', 'Creative_Type', 'Major_Genre'],
    color="Genre_id",
    color_continuous_scale=px.colors.sequential.Emrld,
)
fig.show()

#_____________________________________________________________________
# 4. Parallel Coordinates

import plotly.express as px
from vega_datasets import data
-----------------------------------------------
This is a case study of how the counts, ratings, gross and budget of super hero movies vary over time.

The Movies dataset contains information about super hero movies since 1978 to 2010. Before 2002, the count of super hero
films was always lower than 2, but since 2002, super hero films started to become popular as the count went up tp 6 per year.
Also, there are more movies with higher ratings in both websites since 2002. The highest rated movie in both websites, The Dark
Knight and The Incredibles, both released after 2002. 
In addition, the worldwide gross and production budget tend to grow higher since the year 2002 too.

"""

import pandas as pd
import altair as alt
from vega_datasets import data

sh_movies_df = (data.movies().query('Creative_Type == "Super Hero"'))

sh_movies_df['Date'] = pd.to_datetime(sh_movies_df['Release_Date'])
sh_movies_df = sh_movies_df.set_index('Date')
sh_movies_df['Year'] = sh_movies_df.index.year
sh_movies_df[
    'Rotten_Tomatoes_Rating'] = sh_movies_df['Rotten_Tomatoes_Rating'] / 100
sh_movies_df['IMDB_Rating'] = sh_movies_df['IMDB_Rating'] / 10

brush = alt.selection_interval(encodings=['x'])

years = alt.Chart(sh_movies_df).mark_bar().add_selection(brush).encode(
    alt.X('Year:O', title='Release Year', axis=alt.Axis(labelAngle=45)),
    alt.Y('count():Q',
          title='Counts')).properties(width=650,
                                      height=80,
Ejemplo n.º 5
0
def test_histogram():
    return ar.histogram(data.movies(), column="IMDB_Rating")