예제 #1
0
    df.columns = ["Index", "EI", "id", "x", "y"]
    x = df['x'].values
    y = df['y'].values
    EIvals = df['EI'].values
    psource = ColumnDataSource(data=dict(
        x=x,
        y=y,
        EIvals=EIvals,
    ))
    return psource


source = read_data()
# low=math.floor(np.min(df['EI'].values))
# high=math.ceil(np.max(df['EI'].values))
mapper = LinearColorMapper(palette=Viridis256, low=0, high=1)
color_bar = ColorBar(color_mapper=mapper, location=(0, 0))

app = Flask(__name__)

posts_data = [
    {
        'bug': 'Red Mite',
        'predator': 'Birdy Bird',
        'crops_affected': "really can't say",
        'ei_val': '-999999'
    }  #,
    # {
    #     'author': 'Jane Doe',
    #     'title': 'Blog Post 2',
    #     'content': 'Second post content',
예제 #2
0
def parallel_plot(df, color=None, palette=None):
    """From a dataframe create a parallel coordinate plot
    """
    npts = df.shape[0]
    ndims = len(df.columns)

    if color is None:
        color = np.ones(npts)
    if palette is None:
        palette = ['#ff0000']

    cmap = LinearColorMapper(high=color.min(),
                             low=color.max(),
                             palette=palette)

    data_source = ColumnDataSource(
        dict(xs=np.arange(ndims)[None, :].repeat(npts, axis=0).tolist(),
             ys=np.array((df - df.min()) / (df.max() - df.min())).tolist(),
             color=color))

    p = figure(x_range=(-1, ndims),
               y_range=(0, 1),
               width=1000,
               tools="pan, box_zoom")

    # Create x axis ticks from columns contained in dataframe
    fixed_x_ticks = FixedTicker(ticks=np.arange(ndims), minor_ticks=[])
    formatter_x_ticks = FuncTickFormatter(code="return columns[index]",
                                          args={"columns": df.columns})
    p.xaxis.ticker = fixed_x_ticks
    p.xaxis.formatter = formatter_x_ticks

    p.yaxis.visible = False
    p.y_range.start = 0
    p.y_range.end = 1
    p.y_range.bounds = (-0.1, 1.1)  # add a little padding around y axis
    p.xgrid.visible = False
    p.ygrid.visible = False

    # Create extra y axis for each dataframe column
    tickformatter = BasicTickFormatter(precision=1)
    for index, col in enumerate(df.columns):
        start = df[col].min()
        end = df[col].max()
        bound_min = start + abs(end - start) * (p.y_range.bounds[0] -
                                                p.y_range.start)
        bound_max = end + abs(end - start) * (p.y_range.bounds[1] -
                                              p.y_range.end)
        p.extra_y_ranges.update({
            col:
            Range1d(start=bound_min,
                    end=bound_max,
                    bounds=(bound_min, bound_max))
        })

        fixedticks = FixedTicker(ticks=np.linspace(start, end, 8),
                                 minor_ticks=[])

        p.add_layout(
            LinearAxis(fixed_location=index,
                       y_range_name=col,
                       ticker=fixedticks,
                       formatter=tickformatter), 'right')

    # create the data renderer ( MultiLine )
    # specify selected and non selected style
    non_selected_line_style = dict(line_color='grey',
                                   line_width=0.1,
                                   line_alpha=0.5)

    selected_line_style = dict(line_color={
        'field': 'color',
        'transform': cmap
    },
                               line_width=1)

    parallel_renderer = p.multi_line(xs="xs",
                                     ys="ys",
                                     source=data_source,
                                     **non_selected_line_style)

    # Specify selection style
    selected_lines = MultiLine(**selected_line_style)

    # Specify non selection style
    nonselected_lines = MultiLine(**non_selected_line_style)

    parallel_renderer.selection_glyph = selected_lines
    parallel_renderer.nonselection_glyph = nonselected_lines
    p.y_range.start = p.y_range.bounds[0]
    p.y_range.end = p.y_range.bounds[1]

    rect_source = ColumnDataSource({
        'x': [],
        'y': [],
        'width': [],
        'height': []
    })

    # add rectangle selections
    selection_renderer = p.rect(x='x',
                                y='y',
                                width='width',
                                height='height',
                                source=rect_source,
                                fill_alpha=0.7,
                                fill_color='#009933')
    selection_tool = ParallelSelectionTool(renderer_select=selection_renderer,
                                           renderer_data=parallel_renderer,
                                           box_width=10)
    # custom resets (reset only axes not selections)
    reset_axes = ParallelResetTool()

    # add tools and activate selection ones
    p.add_tools(selection_tool, reset_axes)
    p.toolbar.active_drag = selection_tool
    return p
예제 #3
0
    count = 0
    for i in range(56, 70):
        elements.period[i] = 'La'
        elements.group[i] = str(count + 4)
        count += 1

    count = 0
    for i in range(88, 102):
        elements.period[i] = 'Ac'
        elements.group[i] = str(count + 4)
        count += 1

#Define matplotlib and bokeh color map
if log_scale == 0:
    color_mapper = LinearColorMapper(palette=bokeh_palette,
                                     low=min(data),
                                     high=max(data))
    norm = Normalize(vmin=min(data), vmax=max(data))
elif log_scale == 1:
    for datum in data:
        if datum < 0:
            raise ValueError('Entry for element ' + datum + ' is negative but'
                             ' log-scale is selected')
    color_mapper = LogColorMapper(palette=bokeh_palette,
                                  low=min(data),
                                  high=max(data))
    norm = LogNorm(vmin=min(data), vmax=max(data))
color_scale = ScalarMappable(norm=norm, cmap=cmap).to_rgba(data, alpha=None)

#Define color for blank entries
blank_color = '#c4c4c4'
예제 #4
0
def app():
    st.title('Analysis on Movies from 2011 - 2021')
    st.markdown(
        f"<p><strong>Disclaimer: </strong><em> This web application was created by Dustin Reyes. </strong></em>",
        unsafe_allow_html=True)
    # st.write("This page incorporates analysis on movies from 2011 to 2021 and dashboards to visualize the insights that were observed.")
    st.markdown(
        """<p align="justify"><em>This page incorporates analysis on movies from 2011 to 2021 and dashboards to visualize the insights that were observed. 
                It must be noted that movies with complete information, released in theaters and with reliable sources are only 
                considered for this analysis.</em>""",
        unsafe_allow_html=True)

    # df_movies = pd.read_csv('data/titles_complete_info.csv', usecols = cols)
    df_movies = df_movies_orig.copy()
    title_basics_df = title_basics_df_orig.copy()
    imdb_info_withbudget = imdb_info_withbudget_orig.copy()

    df_movies.dropna(subset=['worldwide_gross', 'metacritic_score'],
                     inplace=True)
    df_movies.reset_index(drop=True, inplace=True)
    df_movies.sort_values(by='release', inplace=True)
    df_movies.reset_index(drop=True, inplace=True)
    df_movies.rename(
        {
            'worldwide_gross': 'Worldwide Gross',
            'metacritic_score': 'Metacritic Score',
            'budget': 'Budget',
            'opening': 'Opening',
            'gross': 'Gross',
            'runtimeMinutes': 'Runtime (Minutes)',
            'averageRating': 'Average Rating',
            'numVotes': 'Number of Votes'
        },
        axis=1,
        inplace=True)

    st.markdown(
        """<p align="justify"> A commercially successful movie not only provides entertainment to the audience but also enables film producers to generate significant profits. 
    Several factors such as veteran actors, social media presence, popularity, and release time are important for profitability, 
    but they do not always guarantee how a movie will have a great reception to the audience. 
    In this page, we sought to understand temporal patterns affecting movie opening performance, 
    see how popular genres change over years, see movie rankings based on chosen metrics, observe movie runtimes across different genres and observe changes in movie ratings and vote averages over time""",
        unsafe_allow_html=True)
    # st.write("See `apps/home.py` to know how to use it.")
    st.markdown(f"<h2> I. Temporal Pattern of Movie Openings",
                unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section aims to analyze the months wherein movies have the best opening performance. 
                        The analysis of temporal patterns across the years enables film makers to strategically release films on months wherein such movies are in demand""",
        unsafe_allow_html=True)

    df_movies['month'] = pd.DatetimeIndex(df_movies['release']).month
    opening_by_month_year = df_movies.groupby(["startYear", "month"
                                               ]).Opening.mean().reset_index()
    newdata = ColumnDataSource(opening_by_month_year)

    mapper = LinearColorMapper(palette=bokeh.palettes.RdBu[9],
                               low=opening_by_month_year["Opening"].min(),
                               high=opening_by_month_year["Opening"].max())

    hover = HoverTool(tooltips=[
        ("Opening", "@Opening{$,}"),
    ])

    TOOLS = [hover, "save,pan,box_zoom,reset,wheel_zoom"]

    p = figure(x_axis_label='Year',
               y_axis_label='Month',
               tools=TOOLS,
               plot_width=900)

    p.rect(x="startYear",
           y="month",
           width=1,
           height=1,
           source=newdata,
           fill_color={
               'field': 'Opening',
               'transform': mapper
           })

    color_bar = ColorBar(color_mapper=mapper,
                         location=(20, 0),
                         label_standoff=18,
                         ticker=AdaptiveTicker(),
                         formatter=NumeralTickFormatter(format="$,"))

    p.add_layout(color_bar, 'right')

    p.title.text = "Movie Opening Performance by Year and Month"
    p.title.align = "center"
    p.title.text_font_size = "20px"
    st.write(p)

    st.markdown(f"<h2> II. Movie Ranking Analysis", unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section visualizes the rankings of movies per year based on the following criterias: 
    <strong>Budget, Opening, Gross, Worldwide Gross, Metacritic Score, Runtime (Minutes), Average Rating, and Number of Votes</strong>. This section enables
    analysts to know what are the qualities and characteristics that movies that have appeared on these rankings have. """,
        unsafe_allow_html=True)
    years = []
    categories = [
        'Budget', 'Opening', 'Gross', 'Worldwide Gross', 'Metacritic Score',
        'Runtime (Minutes)', 'Average Rating', 'Number of Votes'
    ]

    for i in df_movies['startYear'].unique():
        years.append(i)

    option1 = st.selectbox('Pls select the category', categories)

    option2 = st.selectbox('Pls select the year', years)

    figure1 = movie_analyzer(df_movies, category=option1, year=option2)
    st.plotly_chart(figure1)
    # st.write('You selected:', option)

    st.markdown(f"<h2> III. What are the Most Popular Movie Genres?",
                unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section visualizes the most popular genres as a WordCloud. The larger the font, the more frequently appearing
                            the word is. From the WordCloud, we can observe that Action Movies were the most popular movie genres among
                            film makers during the last 10 years.""",
        unsafe_allow_html=True)
    # Join the different processed abstracts together.
    colors = ["#BF0A30", "#002868"]
    cmap = LinearSegmentedColormap.from_list("mycmap", colors)

    long_string = ' '.join(df_movies['genres'].values.tolist())

    # Create a WordCloud object
    wordcloud = WordCloud(background_color="white",
                          colormap=cmap,
                          width=1000,
                          height=300,
                          max_font_size=500,
                          relative_scaling=0.3,
                          min_font_size=5)

    # Generate a word cloud
    wordcloud = wordcloud.generate(long_string)

    # Visualize the word cloud
    plt.figure(figsize=(100, 100))
    fig_cld, axes_cld = plt.subplots(1, 1)
    axes_cld.imshow(wordcloud, interpolation="bilinear")

    plt.axis("off")
    st.pyplot(fig_cld)

    st.markdown(f"<h2> IV. Movie Runtimes per Genre Analysis",
                unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section visualizes movie runtimes per genre. It is important that we identify the characterictics
                    of movies whose runtimes are not normal as these may or may not affect viewership of the said movie. It is also quite possible that
                    these films are experimental in nature and that the director mainly created the movie for test subjects.""",
        unsafe_allow_html=True)
    genres = title_basics_df['genres'].unique().tolist()
    genres.append('All')

    option3 = st.slider('Pls. choose the number of movies to consider?', 2, 20,
                        10)
    option4 = st.selectbox('Pls select the genre', genres)

    figure2 = runtimemovie_analyzer(title_basics_df,
                                    number=option3,
                                    genre=option4)
    st.plotly_chart(figure2)

    st.markdown(f"<h2> V. Performance for each Genre Across the Years",
                unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section aims to visualize the different performance of each genre based on metrics (opening, gross and worldwide gross) across the years 2011 to 2021.
                      """,
        unsafe_allow_html=True)
    categories = ['Opening', 'Gross', 'Worldwide Gross']
    option5 = st.selectbox('Pls select the category', categories)
    figure3 = genre_opening_analyzer(df_movies, category=option5)
    st.plotly_chart(figure3)

    st.markdown(f"<h2> VI. Average Budget per Genre", unsafe_allow_html=True)
    st.markdown(
        """<p align="justify">This section visualizes the average budget per genre across the available data. From the visualization, we
                        can observe that the Action genre has average budgets that were considered as outliers through all the average budgets across genres.
                        Meanwhile, other genres usually have lower budget allocations when being made and such genres include horror, drama, documentaries, comedies.
                      """,
        unsafe_allow_html=True)
    fig = plt.figure(figsize=(15, 10))

    # fliersize is the size of outlier markers
    g = sns.boxplot(x='genres',
                    y='budget',
                    data=imdb_info_withbudget,
                    palette="Set2",
                    linewidth=1,
                    fliersize=1.5)

    g.set(title='Average Budget per Genre',
          ylabel="Average Budget ($M)",
          xlabel="")

    # put a horizontal line on overall mean
    plt.axhline(imdb_info_withbudget.budget.mean(),
                ls='--',
                lw=1,
                color='black')

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    #fig.savefig("filename.png")
    st.pyplot(fig)
def bokeh_map():
    """This function creates the choropleth map with the time slider
    using Bokeh library. The input to the main body of this function
    before, is the geopandas DataFrame that has the'Geometry column'
    (see below). This map is made especially for disease incidence
    visualizations and for other visualization purposes (elections,
    sell-buy, etc), the user should change the high parameter and to
    make the shp_data variable similar to the following:
    index   Municipality  2004-01   2004-02   2004-3  ...  Geometry
      0      Eindhoven       2         5        12         POLYGON()
      1        Breda         8        14         9         POLYGON()
      2 's Hertogenbosch     0         0        19         POLYGON()
    The time slider will read from the columns 2004-01, 2004-02, etc.
    """

    disease = str(input('What disease would you like to Visualize? '))

    # The following condition is to make the map better since the max values
    # only correspond to just a few municipalities.

    # Data Preprocessing
    all_df = disease_studied()
    data = MonthlyTransform(all_df)
    data.find_mun()
    longi, lati = data.center_of_mass()

    # The find_mun method needs to be reused after the center_of_mass method
    data.find_mun()
    data = data.monthly_municipality()
    shp_data = make_df_shapefile(data)
    column_list = shp_data.columns.tolist()
    column_list = [
        col for col in column_list if col not in ('Municipality', 'geometry')
    ]

    # Main Body
    if disease == 'Kinkhoest':
        high = shp_data['2012-04'].max()
    else:
        high = shp_data[column_list].max().max() - 3

    TOOLS = "pan,wheel_zoom,reset,hover,save"

    # This will be the default column to start the visualization
    color_column = '2010-01'

    geojson = shp_data.to_json()

    def slider_title(n):
        return 'Number of Incidences in ' + column_list[n]

    # Initial Column of the Time Slider
    N = 0

    mapper = LinearColorMapper(palette=Magma256[::-1], low=0, high=high)

    geo_source = GeoJSONDataSource(geojson=geojson)
    p = figure(tools=TOOLS,
               toolbar_sticky=False,
               plot_width=600,
               plot_height=285)
    patches_renderer = p.patches('xs',
                                 'ys',
                                 fill_alpha=0.7,
                                 fill_color={
                                     'field': color_column,
                                     'transform': mapper
                                 },
                                 line_color='black',
                                 line_width=0.2,
                                 source=geo_source)

    center_of_mass = p.circle(
        x=longi[N],
        y=lati[N],
        color="#1a5921",
    )

    # This is to remove the axis of the figure to improve its appearance.
    p.xaxis.visible = False
    p.yaxis.visible = False

    # This is to remove the grid lines of the map to improve its appearance.
    p.xgrid.visible = False
    p.ygrid.visible = False

    hover = p.select_one(HoverTool)
    hover.point_policy = "follow_mouse"
    hover.tooltips = [
        ("Municipality", "@Municipality"),
        ("Incidences", "@{" + color_column + "}"),
    ]

    color_bar = ColorBar(color_mapper=mapper,
                         label_standoff=12,
                         border_line_color=None,
                         location=(0, 0))
    p.add_layout(color_bar, 'right')

    # The end of the slider should be this otherwise there is a bag behavior
    slider = Slider(start=0,
                    end=len(column_list) + 2,
                    value=N,
                    step=1,
                    width=515,
                    show_value=False,
                    tooltips=False,
                    title='Number of Incidences in ' + column_list[N])

    def callback(attr, old, new):
        N = slider.value
        color_column = column_list[new]
        slider.title = slider_title(new)
        g = patches_renderer.glyph
        g.fill_color = {**g.fill_color, 'field': color_column}
        k = center_of_mass.glyph
        k.x = longi[new]
        k.y = lati[new]
        hover = p.select_one(HoverTool)
        hover.point_policy = "follow_mouse"
        hover.tooltips = [
            ("Municipality", "@Municipality"),
            ("Incidences", "@{" + column_list[N] + "}"),
        ]

    slider.on_change('value', callback)

    layout = column(p, slider)
    curdoc().add_root(layout)
예제 #6
0
normalizedTotalDeathsPerMillion()

geoDataFrame = geoDataFrame.merge(covidDataFrame,
                                  left_on='country_code',
                                  right_on='iso_code',
                                  how='left')
# convert date column to string as datetime dtype cannot be converted as JSON
geoDataFrame['date'] = geoDataFrame['date'].astype(str)
jsonGeoData = json.loads(geoDataFrame.to_json())
source = GeoJSONDataSource(geojson=json.dumps(jsonGeoData))

# sequential multi-hue color palette.
palette = brewer['YlGnBu'][8]
# reverse color order so that dark blue is highest obesity.
palette = palette[::-1]
color_mapper = LinearColorMapper(palette=palette, low=0, high=8)
# define custom tick labels for color bar.
# tick_labels = {'0': '0%', '5': '5%', '10': '10%', '15': '15%',
#                '20': '20%', '25': '25%', '30': '30%', '35': '35%', '40': '>40%'}
# Create color bar.
color_bar = ColorBar(color_mapper=color_mapper,
                     label_standoff=8,
                     width=500,
                     height=20,
                     border_line_color=None,
                     location=(0, 0),
                     orientation='horizontal')

plot = figure(plot_height=600, plot_width=1000, toolbar_location=None)