Beispiel #1
0
def return_figures():
    """Creates plotly visualizations

    Args:
        None

    Returns:
        list (dict): list containing plotly visualizations
    """
    #plot graph one
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    graph_one = [Pie(
        labels=genre_names,
        values=genre_counts,
    )]

    layout_one = dict(title='Distribution of Message Genres')

    #plot graph two

    message = " ".join(df["message"])
    cleaned_message = tokenize(message)
    word_count_list = Counter(cleaned_message).most_common(10)
    words = list((dict(word_count_list)).keys())
    count = list((dict(word_count_list)).values())

    graph_two = [Bar(
        x=words,
        y=count,
    )]

    layout_two = dict(title='Top 10 Most common words in messages',
                      yaxis=dict(title="counts"))

    #plot graph three

    cat_proportion = df[df.columns[4:]].mean().sort_values(ascending=False)
    cat_names = list(cat_proportion.index)

    graph_three = [Bar(
        x=cat_names,
        y=cat_proportion,
    )]

    layout_three = dict(title='Categorie Distribution of Disaster Response',
                        yaxis=dict(title="Proportion"))

    figures = []
    figures.append(dict(data=graph_one, layout=layout_one))
    figures.append(dict(data=graph_two, layout=layout_two))
    figures.append(dict(data=graph_three, layout=layout_three))

    return figures
Beispiel #2
0
def index():

    # extract data needed for visuals
    # keep the default genre example
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    # get the top five and bottom 5 categories (excluding "Related") by percent of messages
    totals = df.iloc[:, 5:].sum(axis=0).sort_values() / (0.01 * df.shape[0])

    # create visuals
    graph_one = []
    graph_one.append(Bar(x=genre_names, y=genre_counts))

    layout_one = dict(title='Distribution of Message Genres',
                      xaxis=dict(title='Genre'),
                      yaxis=dict(title='Message Count'))

    # top five categories by %
    graph_two = []
    graph_two.append(
        Bar(x=list(totals[-5:].index),
            y=list(totals[-5:].values),
            marker_color='green'))

    layout_two = dict(
        title='Five Most-Identified Categories (excludes Related)',
        xaxis=dict(title='Category'),
        yaxis=dict(title='Percent of Messages'))

    # bottom five categories by %
    graph_three = []
    graph_three.append(
        Bar(x=list(totals[0:5].index),
            y=list(totals[0:5].values),
            marker_color='red'))

    layout_three = dict(title='Five Least-Identified Categories',
                        xaxis=dict(title='Category'),
                        yaxis=dict(title='Percent of Messages'))

    graphs = []
    graphs.append(dict(data=graph_one, layout=layout_one))
    graphs.append(dict(data=graph_two, layout=layout_two))
    graphs.append(dict(data=graph_three, layout=layout_three))

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)
    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)
Beispiel #3
0
    def whatevers_bar_chart(self, whatever: str="total") -> Figure:
        """ Stacked bar chart of total active, deaths and recovered values """

        columns = ["active", "deaths", "recovered"]
        dates   = self.main_df.reset_index()["Date"]
        color   = {
            "active":    "#ffb347",  # Pastel Orange
            "deaths":    "#ff6961",  # Pastel Red
            "recovered": "#77dd78",  # Pastel Green
        }

        engine = inflect.engine()

        fig = Figure()

        for column in columns:
            name = "{} {}".format(whatever.capitalize(), column.capitalize())

            ys = self.main_df[name].sum(level=1)
            if whatever == "daily":
                ys = ys[1:]

            fig.add_trace(Bar(name=name, x=dates, y=ys, marker={"color": color[column]}))

        fig.update_layout(barmode="stack", title_text=engine.plural(whatever).capitalize())

        fig.update_traces(marker_line_width=0)

        return fig
Beispiel #4
0
    def generate_traces(df):
        """Generates traces for a plotly stacked bar chart"""

        traces = []
        for i in range(len(df.columns)):
            trace = Bar(x=list(df.index), y=df.iloc[:, i], name=df.columns[i])
            traces.append(trace)

        return (traces)
def generate_message_categories_distribution_bar_chart():
    """
    create a graph for distribution of the messages.
    """
    data = Bar(x=df_categories.columns,
               y=list(df_categories.sum().sort_values(ascending=False)))
    title = 'Distribution of Message Categories'
    y_title = 'Count'
    x_title = 'Category'

    return generate_graph_with_template(data, title, y_title, x_title)
def generate_message_genres_bar_chart():
    """
    create a graph using extracted data for `genre`
    """
    # extract data needed for visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
    data = Bar(x=genre_names, y=genre_counts)
    title = 'Distribution of Message Genres'
    y_title = 'Count'
    x_title = 'Genre'
    return generate_graph_with_template(data, title, y_title, x_title)
Beispiel #7
0
def index():

    # extracting data needed for visuals
    category_counts = df.iloc[:, 4:].sum().sort_values(ascending=False)
    category_names = list(df.iloc[:,
                                  4:].sum().sort_values(ascending=False).index)

    genre_direct = df[df["genre"] == "direct"].iloc[:, 4:].sum()
    genre_news = df[df["genre"] == "news"].iloc[:, 4:].sum()
    genre_social = df[df["genre"] == "social"].iloc[:, 4:].sum()
    genre_df = pd.DataFrame([genre_direct, genre_news, genre_social],
                            index=["direct", "news", "social"])

    def generate_traces(df):
        """Generates traces for a plotly stacked bar chart"""

        traces = []
        for i in range(len(df.columns)):
            trace = Bar(x=list(df.index), y=df.iloc[:, i], name=df.columns[i])
            traces.append(trace)

        return (traces)

    # creating visuals
    graphs = [{
        'data': [Bar(x=category_names, y=category_counts)],
        'layout': {
            'title': 'Messages per Category',
            'yaxis': {
                'title': "Number of Messages"
            },
            'xaxis': {
                "tickangle": 45,
                "automargin": True
            }
        }
    }, {
        'data': generate_traces(genre_df),
        'layout': {
            'barmode': 'stack',
            'title': 'Messages per Genre',
            'yaxis': {
                'title': "Number of Messages"
            }
        }
    }]

    # encoding plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)
Beispiel #8
0
def create_figures():
    """Create figures for plotly

    Returns:
        List of Figures: Return a list of figures which corresponds to plotly
    """    
    graph_one = []
    graph_one.append(Bar(x = list(df.groupby('genre').count()['message'].sort_values().index), 
                         y=df.groupby('genre').count()['message'].sort_values()))
    layout_one = dict(title = 'Genres of Words', 
                      xaxis = dict(title='Genre'), 
                      yaxis = dict(title='Number of Messages'))
    
    graph_two = []
    graph_two.append(Bar(x =['Not English','English'], 
                         y=[df.original.isna().sum(),
                            len(df)-df.original.isna().sum()]))
    layout_two = dict(title = 'Translated Messages', 
                      xaxis = dict(title='Translation'), 
                      yaxis = dict(title='Number of Messages'))
    
    graph_three = []
    graph_three.append(Bar(x =df_lang.lang[:10], 
                         y=df_lang.counts[:10]))
    layout_three = dict(title = 'Messages\' Language', 
                      xaxis = dict(title='Language'), 
                      yaxis = dict(title='Number of Messages'))
    
    
    figures = []
    figures.append(dict(data=graph_one, 
                        layout=layout_one))
    figures.append(dict(data=graph_two, 
                        layout=layout_two))
    figures.append(dict(data=graph_three, 
                        layout=layout_three))
    # figures.append(dict(data=graph_four, layout=layout_four))
    
    
    return figures
Beispiel #9
0
def index():

    # extract data needed for visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)
    category_columns = df.drop(
        columns=["message", "original", "genre", "id"]).columns
    category_names = category_columns.str.replace("_", " ") \
                                     .str.title()
    category_sum = df[category_columns].sum(axis=0)

    # create visuals
    graphs = [{
        'data': [Bar(x=genre_names, y=genre_counts)],
        'layout': {
            'title': 'Distribution of Message Genres',
            'yaxis': {
                'title': "Count"
            },
            'xaxis': {
                'title': "Genre"
            }
        }
    }, {
        'data': [Bar(x=category_names, y=category_sum)],
        'layout': {
            'title': 'Distribution of Message Categories',
            'yaxis': {
                'title': "Count",
            },
        }
    }]

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)
Beispiel #10
0
def get_figure_of_graph_bar_plot_number_of_scenes(df, xaxis_range=[], title=None):

    figure_height = 800
    df_copy = df.copy()

    logging.info(f'get_figure_of_graph_bar_plot_number_of_scenes - df_copy.head(): \n{df_copy.head()}\n')
    logging.info(f'get_figure_of_graph_bar_plot_number_of_scenes - xaxis_range: {xaxis_range}\n')

    logical_date_range = __get_logical_date_range(df_copy, xaxis_range)

    # I'm goint to build the `data` parameter of `Figure`
    data = []

    # I would like to build each `bar` based on each dataset
    for dataset in df_copy['collection'].unique():
        sub_df = df_copy[(df_copy['collection'] == dataset) & logical_date_range]

        hovertext = 'Number of Scenes: ' + sub_df['number'].map(str) + '<br>' + \
                    'Period: ' + sub_df['year_month'].map(str) + '<br>' + \
                    'Dataset: ' + sub_df['collection'].map(str)

        data.append(Bar({
            'x': sub_df['year_month'],
            'y': sub_df['number'],
            'name': dataset,
            'text': sub_df['number'],  # text inside the bar
            'textposition': 'auto',
            'hovertext': hovertext,
        }))

    fig = Figure({
        'data': data,
        'layout': {
            'title': title,
            'xaxis': {'title': 'Period'},
            'yaxis': {'title': 'Number of scenes'},
            'plot_bgcolor': colors['background'],
            'paper_bgcolor': colors['background'],
            'font': {
                'color': colors['text']
            }
        }
    })

    fig.update_layout(
        barmode='group',
        height=figure_height,
        xaxis_tickangle=-45
    )

    return fig
Beispiel #11
0
def horizontal(dataframe, partei, c_map):


    anzahl = {
        "bundestag": 709,
        "spd": 152,
        "cdu": 200,
        "fdp": 80,
        "afd": 88,
        "linke": 69,
        "gruene":67,
        "csu": 46
    }

    zahl = sum(dataframe.partei == partei)
    if partei == "bundestag":
        zahl = len(dataframe)
    prozent = int(zahl /anzahl[partei] * 100)

    fig = px.bar(
        color = [c_map[partei]], opacity=0.5, 
    )
    fig.add_trace(
        Bar(x = [anzahl[partei]], marker = dict(color = c_map[partei], opacity = 0.5, line_color = "black"), hovertemplate = "MdBs: %{x}")
    )
    fig.add_trace(
        Bar(x = [zahl], marker = dict(color = [c_map[partei]], line_color = "black"), hovertemplate = "MdBs mit Twitter: %{x}", 
        text = f"{partei}-Mitglieder mit Twitter: {prozent} %", textposition = "auto"),
    )

    margins = {"t": 0, "r": 0, "l": 10, "b": 5}
    fig.update_layout(barmode = "overlay", margin = margins,
        plot_bgcolor= bg_col, showlegend= False, paper_bgcolor = bg_col, height = 100)

    fig.update_yaxes(visible = False, fixedrange = True)
    fig.update_xaxes(visible = False, fixedrange = True)
    return fig
Beispiel #12
0
def main():
    """
    Тут відбувається обчислення всіх масивів й побудова графіків.
    """
    names = ['alpha', 'beta', 'gamma', 'delta']
    companies = array([[67, 57, 49, 81, 63], [73, 59, 41, 87, 59],
                       [65, 57, 43, 77, 63], [67, 55, 87, 73, 63]])
    z = scale(companies)
    weights = linspace(1, 5, 5)
    unweighted = score(z)
    non_normalized = score(z, weights)
    normalized = score(z, weights / sum(weights))
    figure = Figure(
    )  # Графік оцінок кожної з варіацій таксонометричного методу.
    figure.add_trace(Bar(name='Незважені', x=names, y=unweighted))
    figure.add_trace(
        Bar(name='Зважені ненормалізовані', x=names, y=non_normalized))
    figure.add_trace(Bar(name='Зважені нормалізовані', x=names, y=normalized))
    figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20})
    figure.write_image('images/scores.png', width=1200, height=600)
    features = ['досвід', 'фінанси', 'іновації', 'динаміка', 'стабільність']
    standard = max(companies, 0)
    figure = Figure()  # Графік профілів таксонометричного методу й еталону.
    figure.add_trace(
        Bar(name='Незважені', x=features, y=companies[argmin(unweighted)]))
    figure.add_trace(
        Bar(name='Зважені ненормалізовані',
            x=features,
            y=companies[argmin(non_normalized)]))
    figure.add_trace(
        Bar(name='Зважені нормалізовані',
            x=features,
            y=companies[argmin(normalized)]))
    figure.add_trace(Bar(name='Еталон', x=features, y=standard))
    figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20})
    figure.write_image('images/profiles.png', width=1200, height=600)
    figure = create_dendrogram(  # Дендрограма відносно еталонного рішення.
        append(companies, [standard], 0),
        orientation='left',
        labels=names + ['standard'])
    figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20})
    figure.write_image('images/dendrogram.png', width=1200, height=600)
Beispiel #13
0
def submit():
    # grab data from form
    name1 = request.forms.get('name1')
    age1 = request.forms.get('age1')
    name2 = request.forms.get('name2')
    age2 = request.forms.get('age2')
    name3 = request.forms.get('name3')
    age3 = request.forms.get('age3')

    data = [Bar(x=[name1, name2, name3], y=[age1, age2, age3])]

    # make api call
    response = plot(data, filename='basic-bar', auto_open=True)

    if response:
        return template('''
                <h1>Congrats!</h1>
                <div>
                  View your graph here: <a href="{{response}}"</a>{{response}}
                </div>
            ''',
                        response=response)
Beispiel #14
0
    def countries_bar_chart(self, column: str, count: int=None, exclusions: list=None) -> Figure:
        """ Top N countries bar chart """

        max_df = self.per_country_max_df(column)
        if exclusions is not None:
            max_df = max_df[~max_df.index.isin(exclusions)]
        if count is not None:
            max_df = max_df.head(count)

        fig = Figure(Bar(
            x=max_df.index,
            y=max_df[column],
            text=max_df[column],
            textposition="outside",
        ))

        title = "Top {} Countries by \"{}\" Column".format(count, column)
        if exclusions is not None:
            title = "{} Excluding {}".format(title, exclusions)

        fig.update_layout(yaxis_type="log")
        fig.update_layout(title_text=title)

        return fig
Beispiel #15
0
def graphs():
    print(df.head())
    list_titles = df.columns
    list_subtitles = [
        'Microgram/L', '% Pumped Out', 'Kiloplatets/Ml', 'Miligram/Dl',
        'MilliEquivalent/L', 'No/Yes'
    ]

    # Create traces
    # preparation for the heatmap
    df_temp = df.copy()
    df_temp = df_temp.corr()
    z = df_temp.values

    graphs = [{
        "data": [{
            "x": df.columns,
            "y": df.columns,
            "z": df_temp.values,
            "type": "heatmap",
        }],
        "layout": {
            'title': 'Correlation between the features and label',
            'yaxis': {
                'title': "Features and label",
                'titlefont': {
                    'size': 10
                },
                'tickfont': {
                    'size': 8
                }
            },
            'xaxis': {
                'title': "Features and label",
                'titlefont': {
                    'size': 13
                },
                'tickfont': {
                    'size': 8
                }
            }
        }
    }, {
        'data': [
            Bar(x=np.arange(5), y=values, name='Feature'),
            Bar(x=np.arange(5),
                y=np.cumsum(values),
                name='Cummulative Features')
        ],
        'layout': {
            'title': 'Feature Importance',
            'yaxis': {
                'title': '% of explanation',
                'titlefont': {
                    'size': 10
                },
                'tickfont': {
                    'size': 8
                }
            },
            'xaxis': {
                'title': 'Feature',
                'titlefont': {
                    'size': 13
                },
                'tickfont': {
                    'size': 8
                },
                'tickvals': np.arange(5),
                'ticktext': columns
            }
        }
    }]
    #graphJSON = json.dumps(graphs,cls=plotly.utils.PlotlyJSONEncoder)
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # This will render the graphs.html Please see that file.
    return render_template('graphs.html', graphJSON=graphJSON, ids=ids)
Beispiel #16
0
from die import Die

# Create a D6 and a D10.
die_1 = Die()
die_2 = Die(10)

# Make some rolls, and store results in a list.
results = []

for roll_num in range(50_000):
    result = die_1.roll() + die_2.roll()
    results.append(result)

# Analyze the results.
frequencies = []
max_result = die_1.num_sides + die_2.num_sides
for value in range(2, max_result + 1):
    frequency = results.count(value)
    frequencies.append(frequency)

# Visualize the results.
x_values = list(range(2, max_result + 1))
data = [Bar(x=x_values, y=frequencies)]

x_axis_config = {'title': 'Result', 'dtick': 1}
y_axis_config = {'title': 'Frequency of Result'}
my_layout = Layout(title='Results of rolling a D6 and a D10 50000 times',
                   xaxis=x_axis_config,
                   yaxis=y_axis_config)
offline.plot({'data': data, 'layout': my_layout}, filename='d6_d10.html')
Beispiel #17
0
def makePlot_Total(code):

    if not all([code]):
        return waitMake(
            'Select Country from Map',
            'Prominent Risks in Selected Country vs. the Global Median')

    code = code['points'].pop()['location'] if code else ''
    name = data['cnmap'][code] if code else 'Country'

    risks = data['risks']
    risks = risks[risks.nick != 'Total']
    final = risks.date.max()

    latest_country = risks[risks.date.eq(final) & risks.code.eq(code)]

    median_country = latest_country.groupby('nick').perc.median().sort_values(
        ascending=False)

    latest_world = risks[risks.date.eq(final)
                         & risks.nick.isin(median_country.index.tolist()[:10])]

    median_world = latest_world.groupby('nick').perc.median()

    x0 = median_country.index.tolist()[:10]
    y0 = median_country.values.tolist()[:10]
    x1 = median_world.index.tolist()
    y1 = median_world.values.tolist()

    trace0 = \
    Bar(
        x      = y0,
        y      = x0,
        marker =
        {
            'color' : 'rgba(50, 171, 96, 0.6)',
            'line'  :
            {
                'color' : 'rgba(50, 171, 96, 1.0)',
                'width' : 1
            }
        },

        name        = f'{name}',
        orientation = 'h',
    )

    trace1 = \
    Bar(
        x      = y1,
        y      = x1,
        marker =
        {
            'color' : 'rgba(170, 131, 126, 0.6)',
            'line'  :
            {
                'color' : 'rgba(70, 71, 196, 1.0)',
                'width' : 1
            }
        },

        name        = 'Global Median',
        orientation = 'h',
    )

    layout = \
    Layout(
        title                = f'<b>Prominent Risks in {name} vs. the Global Median<b>',
        yaxis_showgrid       = False,
        yaxis_showline       = False,
        yaxis_showticklabels = True,

        xaxis_title          = '<b>Relative Risk Exposure<b>',
        xaxis_zeroline       = False,
        xaxis_showline       = False,
        xaxis_showticklabels = True,

        barmode              = 'group',
    )

    return [
        upFigure(traces=[trace0, trace1],
                 layout=layout,
                 margin=(60, 40, 140, 160))
    ]
def index():
    col = df.columns[4:].to_list()
    categories_counts = df[col].sum()
    categories_names = list(categories_counts.index)

    # create visuals
    graphs = [{
        'data': [Bar(x=categories_names, y=categories_counts)],
        'layout': {
            'title': 'Distribution of Message Categories',
            'yaxis': {
                'title': "Count"
            },
            'xaxis': {
                'title': "Categories"
            }
        }
    }]

    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    # create visuals
    graphs += [{
        'data': [Bar(x=genre_names, y=genre_counts)],
        'layout': {
            'title': 'Distribution of Message Genres',
            'yaxis': {
                'title': "Count"
            },
            'xaxis': {
                'title': "Genre"
            }
        }
    }]

    # extract data needed for visuals
    df_lsa_direct = df_lsa[df_lsa['Labels'] == 'direct']
    df_lsa_social = df_lsa[df_lsa['Labels'] == 'social']
    df_lsa_news = df_lsa[df_lsa['Labels'] == 'news']

    # create visuals
    graphs += [{
        'data': [
            Scatter(x=df_lsa_direct['Lsa Score 1'],
                    y=df_lsa_direct['Lsa Score 2'],
                    mode='markers',
                    name='direct messages',
                    text='Genre:' + df_lsa_direct['Labels'] + '<br>' +
                    'Message:' + df_lsa_direct['Message'],
                    marker=dict(symbol='triangle-left',
                                color=df_lsa_direct['Labels'].map({
                                    'direct':
                                    'green',
                                    'news':
                                    'blue',
                                    'social':
                                    'red'
                                }))),
            Scatter(x=df_lsa_social['Lsa Score 1'],
                    y=df_lsa_social['Lsa Score 2'],
                    mode='markers',
                    name='social messages',
                    text='Genre:' + df_lsa_social['Labels'] + '<br>' +
                    'Message:' + df_lsa_social['Message'],
                    marker=dict(symbol='triangle-left',
                                color=df_lsa_social['Labels'].map({
                                    'direct':
                                    'green',
                                    'news':
                                    'blue',
                                    'social':
                                    'red'
                                }))),
            Scatter(x=df_lsa_news['Lsa Score 1'],
                    y=df_lsa_news['Lsa Score 2'],
                    mode='markers',
                    name='news messages',
                    text='Genre:' + df_lsa_news['Labels'] + '<br>' +
                    'Message:' + df_lsa_news['Message'],
                    marker=dict(symbol='triangle-left',
                                color=df_lsa_news['Labels'].map({
                                    'direct': 'green',
                                    'news': 'blue',
                                    'social': 'red'
                                })))
        ],
        'layout': {
            'showlegend': True,
            'title':
            'Latent Semantic Analysis (LSA - 2 components) of the Messages by Genres',
            'width': 1200,
            'height': 1000,
            'yaxis': {
                'title': "LSA Component 1"
            },
            'xaxis': {
                'title': "LSA Component 2"
            },
        }
    }]

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)
Beispiel #19
0
from plotly import subplots

daily_rep_path = datetime.now().strftime("%Y_%m_%d")
path = os.getcwd()
path = os.path.join(path, 'reports', '2020_11_03')

data = pd.read_csv(os.path.join(path, 'stats.csv'), sep=';')

occupation_rep = Histogram(x=data["Time"],
                           y=data["Count"],
                           marker_color='lightcoral',
                           name='Occupancy',
                           opacity=0.7)
sex_rep = Bar(x=[6, 3],
              y=['Females', 'Males'],
              width=0.7,
              marker_color='cadetblue',
              orientation='h',
              name='Sex')
age_rep = Histogram(x=data["Age"],
                    name='Age',
                    opacity=0.7,
                    xbins=dict(start=0, end=100, size=1))
age_time_rep = Bar(x=data["Time"], y=data["Age"])
viola_rep = Pie(values=[24, 36],
                labels=['Mask', 'No mask'],
                hole=0.3,
                name='Violations')

data_to_plot = subplots.make_subplots(
    rows=3,
    cols=2,
Beispiel #20
0
def index():
    # data for the distribution graph
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    # data for the subplots graph
    genre_unique = df['genre'].unique()
    plotting_df = pd.melt(df, id_vars=['genre'], value_vars=df.columns[3:])
    plotting_df = plotting_df.groupby(['genre',
                                       'variable']).sum().reset_index()

    # graph number 2
    fig1 = make_subplots(rows=genre_unique.shape[0],
                         cols=1,
                         print_grid=False,
                         subplot_titles=genre_unique)

    i = 1
    for genre in genre_unique:
        data = plotting_df[plotting_df['genre'] == genre]
        fig1.add_trace(Bar(x=data['variable'],
                           y=data['value'],
                           opacity=0.5,
                           marker=dict(color='#F1C40F')),
                       row=i,
                       col=1)
        i += 1

    # cleaning the layout of the graphs
    layout_custom = layout.Template(layout=Layout(
        titlefont=dict(size=24, color='#34495E')))

    fig1['layout'].update(title='Messages by genre and category',
                          showlegend=False,
                          template=layout_custom)

    fig1['layout']['yaxis1'].update(hoverformat=',d', tickformat=',d')
    fig1['layout']['yaxis2'].update(hoverformat=',d', tickformat=',d')
    fig1['layout']['yaxis3'].update(hoverformat=',d', tickformat=',d')
    fig1['layout']['xaxis1'].update(visible=False)
    fig1['layout']['xaxis2'].update(visible=False)

    # graph number 1
    graphs = [{
        'data': [
            Bar(x=genre_names,
                y=genre_counts,
                opacity=0.5,
                marker=dict(color='#F1C40F'))
        ],
        'layout': {
            'template': layout_custom,
            'title': 'Distribution of Message Genres',
            'yaxis': {
                'title': "Count"
            },
            'xaxis': {
                'title': "Genre"
            }
        }
    }]

    graphs.append(fig1)

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)
Beispiel #21
0
def index():
    # extract data needed for visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    # genre and aid_related status
    aid_rel1 = df[df['aid_related'] == 1].groupby('genre').count()['message']
    aid_rel0 = df[df['aid_related'] == 0].groupby('genre').count()['message']
    genre_names = list(aid_rel1.index)

    # let's calculate distribution of classes with 1
    class_distr1 = df.drop(['id', 'message', 'original', 'genre'],
                           axis=1).sum() / len(df)

    # sorting values in ascending
    class_distr1 = class_distr1.sort_values(ascending=False)

    # series of values that have 0 in classes
    class_distr0 = (class_distr1 - 1) * -1
    class_name = list(class_distr1.index)

    # create visuals
    graphs = [{
        'data': [
            Bar(x=genre_names, y=aid_rel1, name='Aid Related'),
            Bar(x=genre_names, y=aid_rel0, name='Not Aid Related')
        ],
        'layout': {
            'title': 'Message Genre and Aid Relativity',
            'yaxis': {
                'title': "Number of Messages"
            },
            'xaxis': {
                'title': "Genre"
            },
            'barmode': 'group'
        }
    }, {
        'data': [
            Bar(x=class_name, y=class_distr1, name='Class = 1'),
            Bar(x=class_name,
                y=class_distr0,
                name='Class = 0',
                marker=dict(color='rgb(212, 228, 247)'))
        ],
        'layout': {
            'title': 'Label Distribution',
            'yaxis': {
                'title': "Percentage"
            },
            'xaxis': {
                'title': "Label",
            },
            'barmode': 'stack'
        }
    }]

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)
def return_figures(df):
    figures = []

    # visualize imbalance of classes
    if os.path.isfile('data/df_class_imbalance.pkl'):
        graph_one = []
        data_stat = joblib.load('data/df_class_imbalance.pkl')

        graph_one.append(
            Bar(name='0',
                x=data_stat['Disaster message category'],
                y=data_stat['Distribution ratio - 0']))
        graph_one.append(
            Bar(name='1',
                x=data_stat['Disaster message category'],
                y=data_stat['Distribution ratio - 1']))

        layout_one = dict(
            barmode='group',
            title='Distribution ratio of messages for each disaster category',
            yaxis=dict(title="Percentage"),
            xaxis=dict(title="Category", tickangle=-45),
            margin=dict(b=160))

        figures.append(dict(data=graph_one, layout=layout_one))

    # visualize average message length
    if os.path.isfile('data/df_word_length.pkl'):
        graph_two = []
        data_length = joblib.load('data/df_word_length.pkl')

        graph_two.append(
            Scatter(
                name='direct',
                x=data_length[data_length['genre'] == 'direct']['index'],
                y=data_length[data_length['genre'] == 'direct']['length'],
                showlegend=True,
                mode='markers',
            ))
        graph_two.append(
            Scatter(
                name='news',
                x=data_length[data_length['genre'] == 'news']['index'],
                y=data_length[data_length['genre'] == 'news']['length'],
                showlegend=True,
                mode='markers',
            ))
        graph_two.append(
            Scatter(
                name='social',
                x=data_length[data_length['genre'] == 'social']['index'],
                y=data_length[data_length['genre'] == 'social']['length'],
                showlegend=True,
                mode='markers',
            ))

        layout_two = dict(title='Average number of words per message',
                          yaxis=dict(title="Average length"),
                          xaxis=dict(visible=False, title="Message id"))

        figures.append(dict(data=graph_two, layout=layout_two))

    # visualize count per genre
    graph_three = []
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    graph_three.append(Bar(
        x=genre_names,
        y=genre_counts,
    ), )

    layout_three = dict(title='Distribution of Message Genres',
                        yaxis=dict(title="Count"),
                        xaxis=dict(title="Genre"))

    figures.append(dict(data=graph_three, layout=layout_three))

    # visualize category correlation heat map
    graph_four = []
    data_corr = df.iloc[:, 4:]
    corr_list = []
    correl_val = data_corr.corr().values
    for row in correl_val:
        corr_list.append(list(row))

    graph_four.append(
        Heatmap(z=corr_list,
                x=data_corr.columns,
                y=data_corr.columns,
                colorscale='Viridis'))

    layout_four = dict(title='Correlation map of the categories',
                       height=900,
                       margin=dict(l=130, b=160))

    figures.append(dict(data=graph_four, layout=layout_four))

    # visualize most frequent word per category
    if os.path.isfile('data/df_pop_word.pkl'):
        graph_five = []
        pop_word = joblib.load('data/df_pop_word.pkl')
        most_used_words = pop_word['first_word'].unique()
        color_item = build_color_palette(most_used_words)
        colors = get_color_scale(color_item, pop_word['first_word'])

        graph_five.append(
            Bar(x=pop_word['category'],
                y=pop_word['first_word_count'],
                text=pop_word['first_word'],
                textposition='outside',
                textangle=-60,
                textfont={'size': 10},
                marker={'color': colors}))

        layout_five = dict(
            title='Most used word in all messages for each category',
            yaxis=dict(title="Count of messages"),
            xaxis=dict(tickangle=-45, title="Category"),
            height=900,
            margin=dict(l=130, b=140))

        figures.append(dict(data=graph_five, layout=layout_five))

    return figures
Beispiel #23
0
def index():

    # extract data needed for visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    random_idx = np.random.randint(0, df.shape[0], 10)
    random_tweets = df['message'][random_idx].to_list()
    categories = df.loc[random_idx, df.columns[4:]]
    categories = categories.apply(parse_colnames, axis=1)
    categories = categories.tolist()

    cats = df.iloc[:, 4:].sum()
    cats = cats.sort_values(ascending=False)
    values = cats.tolist()
    names = cats.index.tolist()
    names = [name.replace('_', ' ') for name in names]

    # create visuals
    graphs = [{
        'data': [
            Table(header=dict(values=['<b>Tweet<b>', '<b>Categories<b>'],
                              fill_color='cornflowerblue',
                              line_color='darkslategray',
                              font=dict(color='white', size=16),
                              height=40),
                  cells=dict(values=[random_tweets, categories],
                             align=['left', 'center'],
                             line_color='darkslategray',
                             fill_color='whitesmoke'))
        ],
        'layout': {
            'title':
            'Sample of Input Data',
            'annotations': [{
                'text': "Refresh page for more",
                'font': {
                    'size': 13
                },
                'showarrow': False,
                'align': 'center',
                'x': 0.5,
                'y': 1.15,
                'xref': 'paper',
                'yref': 'paper',
            }]
        }
    }, {
        'data': [Bar(y=names, x=values, orientation="h")],
        'layout': {
            'title': 'Category Frequencies',
            'height': 800,
            'yaxis': {
                'tickangle': 0
            },
            'margin': {
                'l': 200
            }
        }
    }, {
        'data': [Bar(x=genre_names, y=genre_counts)],
        'layout': {
            'title': 'Distribution of Tweet Categories',
            'yaxis': {
                'title': "Count"
            },
            'xaxis': {
                'title': "Genre"
            }
        }
    }]

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)
Beispiel #24
0
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    categories = df.iloc[:, 4:].apply(
        pd.to_numeric).sum().sort_values(ascending=False)
    categories_name = [
        category.replace('_', ' ') for category in list(categories.index)
    ]

    correlation = df.iloc[:, 4:].apply(pd.to_numeric).corr()
    correlation_labels = [
        label.replace('_', ' ') for label in correlation.index
    ]

    # create visuals
    # TODO: Below is an example - modify to create your own visuals
    graphs = [{
        'data': [Bar(x=genre_names, y=genre_counts)],
        'layout': {
            'title': 'Distribution of Message Genres',
            'yaxis': {
                'title': "Count"
            },
            'xaxis': {
                'title': "Genre"
            }
        }
    }, {
        'data': [Bar(x=categories_name, y=categories)],
        'layout': {
            'title': 'Categories Frequency',
            'yaxis': {
                'title': "Count"
            },
            'xaxis': {
                'automargin': True,
                'tickangle': -45
            }
        }
    }, {
        'data':
        [Heatmap(x=correlation_labels, y=correlation_labels, z=correlation)],
        'layout': {
            'title': 'Categories Correlation',
            'height': 800,
            'yaxis': {
                'automargin': True,
            },
            'xaxis': {
                'automargin': True,
            }
        }
    }]

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)