def return_figures(): """Creates plotly visualizations Args: None Returns: list (dict): list containing plotly visualizations """ #plot graph one genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) graph_one = [Pie( labels=genre_names, values=genre_counts, )] layout_one = dict(title='Distribution of Message Genres') #plot graph two message = " ".join(df["message"]) cleaned_message = tokenize(message) word_count_list = Counter(cleaned_message).most_common(10) words = list((dict(word_count_list)).keys()) count = list((dict(word_count_list)).values()) graph_two = [Bar( x=words, y=count, )] layout_two = dict(title='Top 10 Most common words in messages', yaxis=dict(title="counts")) #plot graph three cat_proportion = df[df.columns[4:]].mean().sort_values(ascending=False) cat_names = list(cat_proportion.index) graph_three = [Bar( x=cat_names, y=cat_proportion, )] layout_three = dict(title='Categorie Distribution of Disaster Response', yaxis=dict(title="Proportion")) figures = [] figures.append(dict(data=graph_one, layout=layout_one)) figures.append(dict(data=graph_two, layout=layout_two)) figures.append(dict(data=graph_three, layout=layout_three)) return figures
def index(): # extract data needed for visuals # keep the default genre example genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # get the top five and bottom 5 categories (excluding "Related") by percent of messages totals = df.iloc[:, 5:].sum(axis=0).sort_values() / (0.01 * df.shape[0]) # create visuals graph_one = [] graph_one.append(Bar(x=genre_names, y=genre_counts)) layout_one = dict(title='Distribution of Message Genres', xaxis=dict(title='Genre'), yaxis=dict(title='Message Count')) # top five categories by % graph_two = [] graph_two.append( Bar(x=list(totals[-5:].index), y=list(totals[-5:].values), marker_color='green')) layout_two = dict( title='Five Most-Identified Categories (excludes Related)', xaxis=dict(title='Category'), yaxis=dict(title='Percent of Messages')) # bottom five categories by % graph_three = [] graph_three.append( Bar(x=list(totals[0:5].index), y=list(totals[0:5].values), marker_color='red')) layout_three = dict(title='Five Least-Identified Categories', xaxis=dict(title='Category'), yaxis=dict(title='Percent of Messages')) graphs = [] graphs.append(dict(data=graph_one, layout=layout_one)) graphs.append(dict(data=graph_two, layout=layout_two)) graphs.append(dict(data=graph_three, layout=layout_three)) # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def whatevers_bar_chart(self, whatever: str="total") -> Figure: """ Stacked bar chart of total active, deaths and recovered values """ columns = ["active", "deaths", "recovered"] dates = self.main_df.reset_index()["Date"] color = { "active": "#ffb347", # Pastel Orange "deaths": "#ff6961", # Pastel Red "recovered": "#77dd78", # Pastel Green } engine = inflect.engine() fig = Figure() for column in columns: name = "{} {}".format(whatever.capitalize(), column.capitalize()) ys = self.main_df[name].sum(level=1) if whatever == "daily": ys = ys[1:] fig.add_trace(Bar(name=name, x=dates, y=ys, marker={"color": color[column]})) fig.update_layout(barmode="stack", title_text=engine.plural(whatever).capitalize()) fig.update_traces(marker_line_width=0) return fig
def generate_traces(df): """Generates traces for a plotly stacked bar chart""" traces = [] for i in range(len(df.columns)): trace = Bar(x=list(df.index), y=df.iloc[:, i], name=df.columns[i]) traces.append(trace) return (traces)
def generate_message_categories_distribution_bar_chart(): """ create a graph for distribution of the messages. """ data = Bar(x=df_categories.columns, y=list(df_categories.sum().sort_values(ascending=False))) title = 'Distribution of Message Categories' y_title = 'Count' x_title = 'Category' return generate_graph_with_template(data, title, y_title, x_title)
def generate_message_genres_bar_chart(): """ create a graph using extracted data for `genre` """ # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) data = Bar(x=genre_names, y=genre_counts) title = 'Distribution of Message Genres' y_title = 'Count' x_title = 'Genre' return generate_graph_with_template(data, title, y_title, x_title)
def index(): # extracting data needed for visuals category_counts = df.iloc[:, 4:].sum().sort_values(ascending=False) category_names = list(df.iloc[:, 4:].sum().sort_values(ascending=False).index) genre_direct = df[df["genre"] == "direct"].iloc[:, 4:].sum() genre_news = df[df["genre"] == "news"].iloc[:, 4:].sum() genre_social = df[df["genre"] == "social"].iloc[:, 4:].sum() genre_df = pd.DataFrame([genre_direct, genre_news, genre_social], index=["direct", "news", "social"]) def generate_traces(df): """Generates traces for a plotly stacked bar chart""" traces = [] for i in range(len(df.columns)): trace = Bar(x=list(df.index), y=df.iloc[:, i], name=df.columns[i]) traces.append(trace) return (traces) # creating visuals graphs = [{ 'data': [Bar(x=category_names, y=category_counts)], 'layout': { 'title': 'Messages per Category', 'yaxis': { 'title': "Number of Messages" }, 'xaxis': { "tickangle": 45, "automargin": True } } }, { 'data': generate_traces(genre_df), 'layout': { 'barmode': 'stack', 'title': 'Messages per Genre', 'yaxis': { 'title': "Number of Messages" } } }] # encoding plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def create_figures(): """Create figures for plotly Returns: List of Figures: Return a list of figures which corresponds to plotly """ graph_one = [] graph_one.append(Bar(x = list(df.groupby('genre').count()['message'].sort_values().index), y=df.groupby('genre').count()['message'].sort_values())) layout_one = dict(title = 'Genres of Words', xaxis = dict(title='Genre'), yaxis = dict(title='Number of Messages')) graph_two = [] graph_two.append(Bar(x =['Not English','English'], y=[df.original.isna().sum(), len(df)-df.original.isna().sum()])) layout_two = dict(title = 'Translated Messages', xaxis = dict(title='Translation'), yaxis = dict(title='Number of Messages')) graph_three = [] graph_three.append(Bar(x =df_lang.lang[:10], y=df_lang.counts[:10])) layout_three = dict(title = 'Messages\' Language', xaxis = dict(title='Language'), yaxis = dict(title='Number of Messages')) figures = [] figures.append(dict(data=graph_one, layout=layout_one)) figures.append(dict(data=graph_two, layout=layout_two)) figures.append(dict(data=graph_three, layout=layout_three)) # figures.append(dict(data=graph_four, layout=layout_four)) return figures
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) category_columns = df.drop( columns=["message", "original", "genre", "id"]).columns category_names = category_columns.str.replace("_", " ") \ .str.title() category_sum = df[category_columns].sum(axis=0) # create visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=category_names, y=category_sum)], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count", }, } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def get_figure_of_graph_bar_plot_number_of_scenes(df, xaxis_range=[], title=None): figure_height = 800 df_copy = df.copy() logging.info(f'get_figure_of_graph_bar_plot_number_of_scenes - df_copy.head(): \n{df_copy.head()}\n') logging.info(f'get_figure_of_graph_bar_plot_number_of_scenes - xaxis_range: {xaxis_range}\n') logical_date_range = __get_logical_date_range(df_copy, xaxis_range) # I'm goint to build the `data` parameter of `Figure` data = [] # I would like to build each `bar` based on each dataset for dataset in df_copy['collection'].unique(): sub_df = df_copy[(df_copy['collection'] == dataset) & logical_date_range] hovertext = 'Number of Scenes: ' + sub_df['number'].map(str) + '<br>' + \ 'Period: ' + sub_df['year_month'].map(str) + '<br>' + \ 'Dataset: ' + sub_df['collection'].map(str) data.append(Bar({ 'x': sub_df['year_month'], 'y': sub_df['number'], 'name': dataset, 'text': sub_df['number'], # text inside the bar 'textposition': 'auto', 'hovertext': hovertext, })) fig = Figure({ 'data': data, 'layout': { 'title': title, 'xaxis': {'title': 'Period'}, 'yaxis': {'title': 'Number of scenes'}, 'plot_bgcolor': colors['background'], 'paper_bgcolor': colors['background'], 'font': { 'color': colors['text'] } } }) fig.update_layout( barmode='group', height=figure_height, xaxis_tickangle=-45 ) return fig
def horizontal(dataframe, partei, c_map): anzahl = { "bundestag": 709, "spd": 152, "cdu": 200, "fdp": 80, "afd": 88, "linke": 69, "gruene":67, "csu": 46 } zahl = sum(dataframe.partei == partei) if partei == "bundestag": zahl = len(dataframe) prozent = int(zahl /anzahl[partei] * 100) fig = px.bar( color = [c_map[partei]], opacity=0.5, ) fig.add_trace( Bar(x = [anzahl[partei]], marker = dict(color = c_map[partei], opacity = 0.5, line_color = "black"), hovertemplate = "MdBs: %{x}") ) fig.add_trace( Bar(x = [zahl], marker = dict(color = [c_map[partei]], line_color = "black"), hovertemplate = "MdBs mit Twitter: %{x}", text = f"{partei}-Mitglieder mit Twitter: {prozent} %", textposition = "auto"), ) margins = {"t": 0, "r": 0, "l": 10, "b": 5} fig.update_layout(barmode = "overlay", margin = margins, plot_bgcolor= bg_col, showlegend= False, paper_bgcolor = bg_col, height = 100) fig.update_yaxes(visible = False, fixedrange = True) fig.update_xaxes(visible = False, fixedrange = True) return fig
def main(): """ Тут відбувається обчислення всіх масивів й побудова графіків. """ names = ['alpha', 'beta', 'gamma', 'delta'] companies = array([[67, 57, 49, 81, 63], [73, 59, 41, 87, 59], [65, 57, 43, 77, 63], [67, 55, 87, 73, 63]]) z = scale(companies) weights = linspace(1, 5, 5) unweighted = score(z) non_normalized = score(z, weights) normalized = score(z, weights / sum(weights)) figure = Figure( ) # Графік оцінок кожної з варіацій таксонометричного методу. figure.add_trace(Bar(name='Незважені', x=names, y=unweighted)) figure.add_trace( Bar(name='Зважені ненормалізовані', x=names, y=non_normalized)) figure.add_trace(Bar(name='Зважені нормалізовані', x=names, y=normalized)) figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20}) figure.write_image('images/scores.png', width=1200, height=600) features = ['досвід', 'фінанси', 'іновації', 'динаміка', 'стабільність'] standard = max(companies, 0) figure = Figure() # Графік профілів таксонометричного методу й еталону. figure.add_trace( Bar(name='Незважені', x=features, y=companies[argmin(unweighted)])) figure.add_trace( Bar(name='Зважені ненормалізовані', x=features, y=companies[argmin(non_normalized)])) figure.add_trace( Bar(name='Зважені нормалізовані', x=features, y=companies[argmin(normalized)])) figure.add_trace(Bar(name='Еталон', x=features, y=standard)) figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20}) figure.write_image('images/profiles.png', width=1200, height=600) figure = create_dendrogram( # Дендрограма відносно еталонного рішення. append(companies, [standard], 0), orientation='left', labels=names + ['standard']) figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20}) figure.write_image('images/dendrogram.png', width=1200, height=600)
def submit(): # grab data from form name1 = request.forms.get('name1') age1 = request.forms.get('age1') name2 = request.forms.get('name2') age2 = request.forms.get('age2') name3 = request.forms.get('name3') age3 = request.forms.get('age3') data = [Bar(x=[name1, name2, name3], y=[age1, age2, age3])] # make api call response = plot(data, filename='basic-bar', auto_open=True) if response: return template(''' <h1>Congrats!</h1> <div> View your graph here: <a href="{{response}}"</a>{{response}} </div> ''', response=response)
def countries_bar_chart(self, column: str, count: int=None, exclusions: list=None) -> Figure: """ Top N countries bar chart """ max_df = self.per_country_max_df(column) if exclusions is not None: max_df = max_df[~max_df.index.isin(exclusions)] if count is not None: max_df = max_df.head(count) fig = Figure(Bar( x=max_df.index, y=max_df[column], text=max_df[column], textposition="outside", )) title = "Top {} Countries by \"{}\" Column".format(count, column) if exclusions is not None: title = "{} Excluding {}".format(title, exclusions) fig.update_layout(yaxis_type="log") fig.update_layout(title_text=title) return fig
def graphs(): print(df.head()) list_titles = df.columns list_subtitles = [ 'Microgram/L', '% Pumped Out', 'Kiloplatets/Ml', 'Miligram/Dl', 'MilliEquivalent/L', 'No/Yes' ] # Create traces # preparation for the heatmap df_temp = df.copy() df_temp = df_temp.corr() z = df_temp.values graphs = [{ "data": [{ "x": df.columns, "y": df.columns, "z": df_temp.values, "type": "heatmap", }], "layout": { 'title': 'Correlation between the features and label', 'yaxis': { 'title': "Features and label", 'titlefont': { 'size': 10 }, 'tickfont': { 'size': 8 } }, 'xaxis': { 'title': "Features and label", 'titlefont': { 'size': 13 }, 'tickfont': { 'size': 8 } } } }, { 'data': [ Bar(x=np.arange(5), y=values, name='Feature'), Bar(x=np.arange(5), y=np.cumsum(values), name='Cummulative Features') ], 'layout': { 'title': 'Feature Importance', 'yaxis': { 'title': '% of explanation', 'titlefont': { 'size': 10 }, 'tickfont': { 'size': 8 } }, 'xaxis': { 'title': 'Feature', 'titlefont': { 'size': 13 }, 'tickfont': { 'size': 8 }, 'tickvals': np.arange(5), 'ticktext': columns } } }] #graphJSON = json.dumps(graphs,cls=plotly.utils.PlotlyJSONEncoder) ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # This will render the graphs.html Please see that file. return render_template('graphs.html', graphJSON=graphJSON, ids=ids)
from die import Die # Create a D6 and a D10. die_1 = Die() die_2 = Die(10) # Make some rolls, and store results in a list. results = [] for roll_num in range(50_000): result = die_1.roll() + die_2.roll() results.append(result) # Analyze the results. frequencies = [] max_result = die_1.num_sides + die_2.num_sides for value in range(2, max_result + 1): frequency = results.count(value) frequencies.append(frequency) # Visualize the results. x_values = list(range(2, max_result + 1)) data = [Bar(x=x_values, y=frequencies)] x_axis_config = {'title': 'Result', 'dtick': 1} y_axis_config = {'title': 'Frequency of Result'} my_layout = Layout(title='Results of rolling a D6 and a D10 50000 times', xaxis=x_axis_config, yaxis=y_axis_config) offline.plot({'data': data, 'layout': my_layout}, filename='d6_d10.html')
def makePlot_Total(code): if not all([code]): return waitMake( 'Select Country from Map', 'Prominent Risks in Selected Country vs. the Global Median') code = code['points'].pop()['location'] if code else '' name = data['cnmap'][code] if code else 'Country' risks = data['risks'] risks = risks[risks.nick != 'Total'] final = risks.date.max() latest_country = risks[risks.date.eq(final) & risks.code.eq(code)] median_country = latest_country.groupby('nick').perc.median().sort_values( ascending=False) latest_world = risks[risks.date.eq(final) & risks.nick.isin(median_country.index.tolist()[:10])] median_world = latest_world.groupby('nick').perc.median() x0 = median_country.index.tolist()[:10] y0 = median_country.values.tolist()[:10] x1 = median_world.index.tolist() y1 = median_world.values.tolist() trace0 = \ Bar( x = y0, y = x0, marker = { 'color' : 'rgba(50, 171, 96, 0.6)', 'line' : { 'color' : 'rgba(50, 171, 96, 1.0)', 'width' : 1 } }, name = f'{name}', orientation = 'h', ) trace1 = \ Bar( x = y1, y = x1, marker = { 'color' : 'rgba(170, 131, 126, 0.6)', 'line' : { 'color' : 'rgba(70, 71, 196, 1.0)', 'width' : 1 } }, name = 'Global Median', orientation = 'h', ) layout = \ Layout( title = f'<b>Prominent Risks in {name} vs. the Global Median<b>', yaxis_showgrid = False, yaxis_showline = False, yaxis_showticklabels = True, xaxis_title = '<b>Relative Risk Exposure<b>', xaxis_zeroline = False, xaxis_showline = False, xaxis_showticklabels = True, barmode = 'group', ) return [ upFigure(traces=[trace0, trace1], layout=layout, margin=(60, 40, 140, 160)) ]
def index(): col = df.columns[4:].to_list() categories_counts = df[col].sum() categories_names = list(categories_counts.index) # create visuals graphs = [{ 'data': [Bar(x=categories_names, y=categories_counts)], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Categories" } } }] genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # create visuals graphs += [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }] # extract data needed for visuals df_lsa_direct = df_lsa[df_lsa['Labels'] == 'direct'] df_lsa_social = df_lsa[df_lsa['Labels'] == 'social'] df_lsa_news = df_lsa[df_lsa['Labels'] == 'news'] # create visuals graphs += [{ 'data': [ Scatter(x=df_lsa_direct['Lsa Score 1'], y=df_lsa_direct['Lsa Score 2'], mode='markers', name='direct messages', text='Genre:' + df_lsa_direct['Labels'] + '<br>' + 'Message:' + df_lsa_direct['Message'], marker=dict(symbol='triangle-left', color=df_lsa_direct['Labels'].map({ 'direct': 'green', 'news': 'blue', 'social': 'red' }))), Scatter(x=df_lsa_social['Lsa Score 1'], y=df_lsa_social['Lsa Score 2'], mode='markers', name='social messages', text='Genre:' + df_lsa_social['Labels'] + '<br>' + 'Message:' + df_lsa_social['Message'], marker=dict(symbol='triangle-left', color=df_lsa_social['Labels'].map({ 'direct': 'green', 'news': 'blue', 'social': 'red' }))), Scatter(x=df_lsa_news['Lsa Score 1'], y=df_lsa_news['Lsa Score 2'], mode='markers', name='news messages', text='Genre:' + df_lsa_news['Labels'] + '<br>' + 'Message:' + df_lsa_news['Message'], marker=dict(symbol='triangle-left', color=df_lsa_news['Labels'].map({ 'direct': 'green', 'news': 'blue', 'social': 'red' }))) ], 'layout': { 'showlegend': True, 'title': 'Latent Semantic Analysis (LSA - 2 components) of the Messages by Genres', 'width': 1200, 'height': 1000, 'yaxis': { 'title': "LSA Component 1" }, 'xaxis': { 'title': "LSA Component 2" }, } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
from plotly import subplots daily_rep_path = datetime.now().strftime("%Y_%m_%d") path = os.getcwd() path = os.path.join(path, 'reports', '2020_11_03') data = pd.read_csv(os.path.join(path, 'stats.csv'), sep=';') occupation_rep = Histogram(x=data["Time"], y=data["Count"], marker_color='lightcoral', name='Occupancy', opacity=0.7) sex_rep = Bar(x=[6, 3], y=['Females', 'Males'], width=0.7, marker_color='cadetblue', orientation='h', name='Sex') age_rep = Histogram(x=data["Age"], name='Age', opacity=0.7, xbins=dict(start=0, end=100, size=1)) age_time_rep = Bar(x=data["Time"], y=data["Age"]) viola_rep = Pie(values=[24, 36], labels=['Mask', 'No mask'], hole=0.3, name='Violations') data_to_plot = subplots.make_subplots( rows=3, cols=2,
def index(): # data for the distribution graph genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # data for the subplots graph genre_unique = df['genre'].unique() plotting_df = pd.melt(df, id_vars=['genre'], value_vars=df.columns[3:]) plotting_df = plotting_df.groupby(['genre', 'variable']).sum().reset_index() # graph number 2 fig1 = make_subplots(rows=genre_unique.shape[0], cols=1, print_grid=False, subplot_titles=genre_unique) i = 1 for genre in genre_unique: data = plotting_df[plotting_df['genre'] == genre] fig1.add_trace(Bar(x=data['variable'], y=data['value'], opacity=0.5, marker=dict(color='#F1C40F')), row=i, col=1) i += 1 # cleaning the layout of the graphs layout_custom = layout.Template(layout=Layout( titlefont=dict(size=24, color='#34495E'))) fig1['layout'].update(title='Messages by genre and category', showlegend=False, template=layout_custom) fig1['layout']['yaxis1'].update(hoverformat=',d', tickformat=',d') fig1['layout']['yaxis2'].update(hoverformat=',d', tickformat=',d') fig1['layout']['yaxis3'].update(hoverformat=',d', tickformat=',d') fig1['layout']['xaxis1'].update(visible=False) fig1['layout']['xaxis2'].update(visible=False) # graph number 1 graphs = [{ 'data': [ Bar(x=genre_names, y=genre_counts, opacity=0.5, marker=dict(color='#F1C40F')) ], 'layout': { 'template': layout_custom, 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }] graphs.append(fig1) # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # genre and aid_related status aid_rel1 = df[df['aid_related'] == 1].groupby('genre').count()['message'] aid_rel0 = df[df['aid_related'] == 0].groupby('genre').count()['message'] genre_names = list(aid_rel1.index) # let's calculate distribution of classes with 1 class_distr1 = df.drop(['id', 'message', 'original', 'genre'], axis=1).sum() / len(df) # sorting values in ascending class_distr1 = class_distr1.sort_values(ascending=False) # series of values that have 0 in classes class_distr0 = (class_distr1 - 1) * -1 class_name = list(class_distr1.index) # create visuals graphs = [{ 'data': [ Bar(x=genre_names, y=aid_rel1, name='Aid Related'), Bar(x=genre_names, y=aid_rel0, name='Not Aid Related') ], 'layout': { 'title': 'Message Genre and Aid Relativity', 'yaxis': { 'title': "Number of Messages" }, 'xaxis': { 'title': "Genre" }, 'barmode': 'group' } }, { 'data': [ Bar(x=class_name, y=class_distr1, name='Class = 1'), Bar(x=class_name, y=class_distr0, name='Class = 0', marker=dict(color='rgb(212, 228, 247)')) ], 'layout': { 'title': 'Label Distribution', 'yaxis': { 'title': "Percentage" }, 'xaxis': { 'title': "Label", }, 'barmode': 'stack' } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def return_figures(df): figures = [] # visualize imbalance of classes if os.path.isfile('data/df_class_imbalance.pkl'): graph_one = [] data_stat = joblib.load('data/df_class_imbalance.pkl') graph_one.append( Bar(name='0', x=data_stat['Disaster message category'], y=data_stat['Distribution ratio - 0'])) graph_one.append( Bar(name='1', x=data_stat['Disaster message category'], y=data_stat['Distribution ratio - 1'])) layout_one = dict( barmode='group', title='Distribution ratio of messages for each disaster category', yaxis=dict(title="Percentage"), xaxis=dict(title="Category", tickangle=-45), margin=dict(b=160)) figures.append(dict(data=graph_one, layout=layout_one)) # visualize average message length if os.path.isfile('data/df_word_length.pkl'): graph_two = [] data_length = joblib.load('data/df_word_length.pkl') graph_two.append( Scatter( name='direct', x=data_length[data_length['genre'] == 'direct']['index'], y=data_length[data_length['genre'] == 'direct']['length'], showlegend=True, mode='markers', )) graph_two.append( Scatter( name='news', x=data_length[data_length['genre'] == 'news']['index'], y=data_length[data_length['genre'] == 'news']['length'], showlegend=True, mode='markers', )) graph_two.append( Scatter( name='social', x=data_length[data_length['genre'] == 'social']['index'], y=data_length[data_length['genre'] == 'social']['length'], showlegend=True, mode='markers', )) layout_two = dict(title='Average number of words per message', yaxis=dict(title="Average length"), xaxis=dict(visible=False, title="Message id")) figures.append(dict(data=graph_two, layout=layout_two)) # visualize count per genre graph_three = [] genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) graph_three.append(Bar( x=genre_names, y=genre_counts, ), ) layout_three = dict(title='Distribution of Message Genres', yaxis=dict(title="Count"), xaxis=dict(title="Genre")) figures.append(dict(data=graph_three, layout=layout_three)) # visualize category correlation heat map graph_four = [] data_corr = df.iloc[:, 4:] corr_list = [] correl_val = data_corr.corr().values for row in correl_val: corr_list.append(list(row)) graph_four.append( Heatmap(z=corr_list, x=data_corr.columns, y=data_corr.columns, colorscale='Viridis')) layout_four = dict(title='Correlation map of the categories', height=900, margin=dict(l=130, b=160)) figures.append(dict(data=graph_four, layout=layout_four)) # visualize most frequent word per category if os.path.isfile('data/df_pop_word.pkl'): graph_five = [] pop_word = joblib.load('data/df_pop_word.pkl') most_used_words = pop_word['first_word'].unique() color_item = build_color_palette(most_used_words) colors = get_color_scale(color_item, pop_word['first_word']) graph_five.append( Bar(x=pop_word['category'], y=pop_word['first_word_count'], text=pop_word['first_word'], textposition='outside', textangle=-60, textfont={'size': 10}, marker={'color': colors})) layout_five = dict( title='Most used word in all messages for each category', yaxis=dict(title="Count of messages"), xaxis=dict(tickangle=-45, title="Category"), height=900, margin=dict(l=130, b=140)) figures.append(dict(data=graph_five, layout=layout_five)) return figures
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) random_idx = np.random.randint(0, df.shape[0], 10) random_tweets = df['message'][random_idx].to_list() categories = df.loc[random_idx, df.columns[4:]] categories = categories.apply(parse_colnames, axis=1) categories = categories.tolist() cats = df.iloc[:, 4:].sum() cats = cats.sort_values(ascending=False) values = cats.tolist() names = cats.index.tolist() names = [name.replace('_', ' ') for name in names] # create visuals graphs = [{ 'data': [ Table(header=dict(values=['<b>Tweet<b>', '<b>Categories<b>'], fill_color='cornflowerblue', line_color='darkslategray', font=dict(color='white', size=16), height=40), cells=dict(values=[random_tweets, categories], align=['left', 'center'], line_color='darkslategray', fill_color='whitesmoke')) ], 'layout': { 'title': 'Sample of Input Data', 'annotations': [{ 'text': "Refresh page for more", 'font': { 'size': 13 }, 'showarrow': False, 'align': 'center', 'x': 0.5, 'y': 1.15, 'xref': 'paper', 'yref': 'paper', }] } }, { 'data': [Bar(y=names, x=values, orientation="h")], 'layout': { 'title': 'Category Frequencies', 'height': 800, 'yaxis': { 'tickangle': 0 }, 'margin': { 'l': 200 } } }, { 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Tweet Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) categories = df.iloc[:, 4:].apply( pd.to_numeric).sum().sort_values(ascending=False) categories_name = [ category.replace('_', ' ') for category in list(categories.index) ] correlation = df.iloc[:, 4:].apply(pd.to_numeric).corr() correlation_labels = [ label.replace('_', ' ') for label in correlation.index ] # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=categories_name, y=categories)], 'layout': { 'title': 'Categories Frequency', 'yaxis': { 'title': "Count" }, 'xaxis': { 'automargin': True, 'tickangle': -45 } } }, { 'data': [Heatmap(x=correlation_labels, y=correlation_labels, z=correlation)], 'layout': { 'title': 'Categories Correlation', 'height': 800, 'yaxis': { 'automargin': True, }, 'xaxis': { 'automargin': True, } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)