repo_links.append(repo_link) print(f"Name: {repo_dict['name']}") print(f"Owner: {repo_dict['owner']['login']}") print(f"Stars: {repo_dict['stargazers_count']}") print(f"Repository: {repo_dict['html_url']}") print(f"Created: {repo_dict['created_at']}") print(f"Updated: {repo_dict['updated_at']}") print(f"Description: {repo_dict['description']}") print('') data = [ Bar(x=repo_links, y=stars, text=labels, marker={ 'color': 'rgb(60, 100, 150)', 'line': { 'width': 1.5, 'color': 'rgb(25, 25, 25)' } }, opacity=0.6) ] my_layout = { 'title': 'Most-Starred JavaScript Projects on GitHub', 'xaxis': { 'title': 'Repository' }, 'yaxis': { 'title': 'Stars' }, }
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) medical_help = df.groupby(by = ['genre'])['medical_help'].mean().sort_values() money_help = df.groupby(by = ['genre'])['money'].mean().sort_values() # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Bar( x=genre_names, y=medical_help ) ], 'layout': { 'title': 'Avg of Medical Help in Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Bar( x=genre_names, y=money_help ) ], 'layout': { 'title': 'Avg of Money Help in Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
# Create two d6 die_1 = Die() die_2 = Die() # Make some rolls, and store results in a list. results = [] for roll in range(1_000_000): result = die_1.roll() * die_2.roll() results.append(result) # Analyze the results. frequencies = [] max_result = die_1.num_sides * die_2.num_sides for value in range(1, max_result + 1): frequency = results.count(value) frequencies.append(frequency) # Visualize the results. x_values = list(range(1, max_result + 1)) data = [Bar(x=x_values, y=frequencies)] # The 'dtick': 1 setting tells Plotly to label every tick mark. x_axis_config = {'title': 'Result', 'dtick': 1} y_axis_config = {'title': 'Frequency of Result'} my_layout = Layout( title='Results of multiplying the roll results of two d6 1,000,000 times', xaxis=x_axis_config, yaxis=y_axis_config) offline.plot({'data': data, 'layout': my_layout}, filename='d6Xd6.html')
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # Top categories remove_col = ['id', 'message', 'original', 'genre'] y = df.loc[:, ~df.columns.isin(remove_col)] category_counts_top5 = y.sum().sort_values().tail() category_names_top5 = list(category_counts_top5.index) category_counts_bottom5 = y.sum().sort_values().head() category_names_bottom5 = list(category_counts_bottom5.index) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Bar( y=category_names_top5, x=category_counts_top5, orientation = 'h' ) ], 'layout': { 'title': 'Top 5 Categories', 'xaxis': { 'title': "Count" } } }, { 'data': [ Bar( y=category_names_bottom5, x=category_counts_bottom5, orientation = 'h' ) ], 'layout': { 'title': 'Least represented 5 Categories', 'xaxis': { 'title': "Count" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
from die import Die from plotly.graph_objs import Bar, Layout from plotly import offline #six sided die die_1 = Die() die_2 = Die() roll_results = [] for roll_num in range(100000): roll_results.append(die_1.roll() + die_2.roll()) freq = [] max_result = die_1.num_sides + die_2.num_sides for value in range(2,max_result+1): freq.append(roll_results.count(value)) x_vals = list(range(2,max_result+1)) data = [Bar(x=x_vals,y=freq)] x_ax_config = {'title': 'Result','dtick':1} y_ax_config = {'title': 'Freuency of Result'} my_layout = Layout(title='Results of rolling two D6 1000 times', xaxis= x_ax_config,yaxis = y_ax_config) offline.plot({'data':data, 'layout': my_layout}, filename = 'd6_d6.html')
def index(): """Renders landing page along with some analysis on training dataset Extracts the data needed for visuals Uses plotly for creating visuals renders the template """ # extracting data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) col_val={} y=df.iloc[:,4:] for col in y: val=0 if '1' in y[col].value_counts(): val=y[col].value_counts()[1] if '2' in y[col].value_counts(): val+=y[col].value_counts()[2] col_val[col]=val col_val = sorted(col_val.items(), key=operator.itemgetter(1),reverse=True) col_counts=[val[1] for val in col_val] col_names=[val[0] for val in col_val] # creating visuals using plotly graphs = [ { # This bar plot is for knowing the distribution of messages with Genre 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, # This bar plot is for knowing how messages spread across categories { 'data': [ Bar( y=col_counts, x=col_names, ) ], 'layout': { 'title': 'Count of messages in each category', 'yaxis': { 'title': "Count" }, } }, # This pie plot is for knowing how messages relative spread across categories { 'data':[ Pie( labels=col_names, values=col_counts, textposition='inside' ) ], 'layout':{ 'title':' Percentage distribution of message in each category', 'orientation':'h' } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # Extract category_name proportions as a whole category_proportions = df.iloc[:, 4:].sum() / df.shape[0] category_names = list(df.columns[4:]) # Extract category_name proportions based on three different genres category_sum_by_genre = df.groupby('genre').sum() ## genre = direct: category_direct_proportion = category_sum_by_genre.iloc[ 0, 1:] / genre_counts[0] ## genre = news: category_news_proportion = category_sum_by_genre.iloc[1, 1:] / genre_counts[1] ## genre = social: category_social_proportion = category_sum_by_genre.iloc[ 2, 1:] / genre_counts[2] # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Counts" }, 'xaxis': { 'title': "Genres" } } }, { 'data': [Bar(x=category_names, y=category_proportions)], 'layout': { 'title': 'Proportion of Categories', 'yaxis': { 'title': "Message proportions" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Bar(x=category_names, y=category_direct_proportion)], 'layout': { 'title': 'Proportion of Categories in Direct Genre', 'yaxis': { 'title': "Message Proportions" } } }, { 'data': [Bar(x=category_names, y=category_news_proportion)], 'layout': { 'title': 'Proportion of Categories in News Genre', 'yaxis': { 'title': "Message Proportions" } } }, { 'data': [Bar(x=category_names, y=category_social_proportion)], 'layout': { 'title': 'Proportion of Categories in Social Genre', 'yaxis': { 'title': "Message Proportions" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) data_per_category = df.iloc[:,4:].sum().sort_values(ascending=False) category_names = data_per_category.index # Align df_rep with sorted data_per_category.index index = [] for w in data_per_category.index: index.append((w, 'precision')) index.append((w, 'recall')) index.append((w, 'f1-score')) index.append((w, 'support')) df_rep_align = df_rep.reindex(index) # filter f1-score data out of df_rep df_rep_align = df_rep_align.xs('f1-score', axis=0, level=1)\ .mul(100)\ .round(1)\ .iloc[:,:3]\ .rename(columns={ '0': 'f1-score: not this category', '1': 'f1-score: this category'} ) # create visuals graphs = [ { 'data': [ Bar( x=category_names, y=data_per_category ) ], 'layout': { 'title': 'Messages per Category', 'Autosize': False, 'height': 600, 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category", 'automargin': True } } }, { 'data': [ Scatter( x=category_names, y=df_rep_align['accuracy'], name='Accuracy', type='scatter' ), Scatter( x=category_names, y=df_rep_align['f1-score: not this category'], name='f1-score: not this category' ), Scatter( x=category_names, y=df_rep_align['f1-score: this category'], name='f1-score: this category' ) ], 'layout': { 'title': 'Prediction Accuracy and F1-Score on a Test Dataset', 'height': 600, 'yaxis': { 'title': "Percentage [%]" }, 'xaxis': { 'title': "Category", 'automargin': True }, 'legend': { 'xanchor': "auto", 'yanchor': "auto", 'y': 0.8 } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals rowSums = df.iloc[:, 2:].sum(axis=1) multiLabel_counts = rowSums.value_counts() multiLabel_counts = multiLabel_counts.iloc[1:] genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) categories = list(df.columns[3:].values) colors = ['aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'salmon', 'darksalmon', 'lightcoral', 'indianred', 'crimson', 'firebrick', 'coral', 'tomato','orangered', 'gold', 'orange', 'lawngreen', 'chartreuse', 'limegreen', 'lime', 'forestgreen', 'green', 'darkgreen', 'greenyellow', 'yellowgreen', 'springgreen'] # create visuals graphs = [ { 'data': [ Bar( x=multiLabel_counts.index, y=multiLabel_counts.values, marker=dict(color=random.sample(colors, len(colors))), ) ], 'layout': { 'title': 'Comments having multiple labels', 'yaxis': { 'title': "Number of comments" }, 'xaxis': { 'title': "Number of labels" } } }, { 'data': [ Bar( x=categories, y=df.iloc[:, 3:].sum().values, marker=dict(color=random.sample(colors, len(colors))), ) ], 'layout': { 'title': 'Comments in each category', 'yaxis': { 'title': "Number of comments" }, 'xaxis': { 'title': "Comment Type", 'tickangle': 50, 'automargin': True, } } }, { 'data': [ Bar( x=genre_names, y=genre_counts, marker=dict(color=['blueviolet', 'brown', 'blanchedalmond']) ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): #Finding count of each category and storing their names in a list cat_counts = df[df.columns[4:]].sum() cat_names = df[df.columns[4:]].sum().index #Finding co-occurrence matrix for each category coocc = df[df.columns[4:]].T.dot(df[df.columns[4:]]) #Setting diagonal values to 0 np.fill_diagonal(coocc.values, 0) #Finding top 20 most occurring pair of categories in message dataset top_N = 20 #Considering all except first two categories ie related and request as they #occur too often and are very generic ncoocc = coocc.values[2:, 2:] #Since co-occurrence matrix is symmteric, we set upper triangular part of #the matrix to 0 so that we do not consider a pair twice iu1 = np.triu_indices(34) ncoocc[iu1] = 0 #Extracting names of concerned categories cats_considered = df.columns[6:] #Finding IDs to of top 20 values in co-occurence matrix idx = np.argpartition(ncoocc, ncoocc.size - top_N, axis=None)[-top_N:] results = np.column_stack(np.unravel_index(idx, ncoocc.shape)) freq = [] cats = [] for result in results: freq.append(ncoocc[result[0], result[1]]) cats.append([cats_considered[result[0]], cats_considered[result[1]]]) graphs = [{ 'data': [Bar(x=cat_names, y=cat_counts)], 'layout': { 'title': 'Distribution of Messages under various categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Bar(x=[cat[0] + "+" + cat[1] for cat in cats], y=freq)], 'layout': { 'title': 'Top 20 co-occurring categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'type': 'category', 'title': "Category pair", 'tickangle': 30 } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # extract data for second chart disaster_categories = (df[['genre', 'search_and_rescue', 'infrastructure_related', 'weather_related']] .groupby('genre').sum()) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [ # changed first chart for a pie chart Pie( labels = genre_names, values = genre_counts, marker = { 'colors': [ '#DBDBDB', '#808080', '#383838', ] }, sort = False ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ # add a second chart, bar chart, with the counts per # message related type and genre Bar( x = list(disaster_categories.index), y = disaster_categories['search_and_rescue'], name = 'Search & Rescue', marker_color = ['#DBDBDB', '#DBDBDB', '#DBDBDB'] ), Bar( x = list(disaster_categories.index), y = disaster_categories['infrastructure_related'], name = 'Infrastructured', marker_color = ['#808080', '#808080', '#808080'] ), Bar( x = list(disaster_categories.index), y = disaster_categories['weather_related'], name = 'Weather', marker_color = ['#383838', '#383838', '#383838'] ) ], 'layout': { 'title': 'Distribution of Message Related Types', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) temp = df.drop(['id', 'message', 'original', 'genre'], axis=1) #Finding Distribution of Categories cat_counts = temp.sum() cat_names = list(cat_counts.index) #Finding Top 10 Categories by Proportions cat_props = (cat_counts / cat_counts.sum()).sort_values(ascending=False)[:10] cat_prop_names = list(cat_props.index) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=cat_names, y=cat_counts)], 'layout': { 'title': 'Distribution of Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Bar(x=cat_prop_names, y=cat_props)], 'layout': { 'title': 'Top 10 Categories and their Proportions', 'yaxis': { 'title': "Proportion" }, 'xaxis': { 'title': "Category" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # Show distribution of different category category_colnames = list(df.columns[4:]) category_boolean = [] for column_name in category_colnames: category_boolean.append(np.sum(df[column_name])) # extract data exclude related categories = df.iloc[:, 4:] categories_mean = categories.mean().sort_values(ascending=False)[1:11] categories_colnames = list(categories_mean.index) # create visuals graphs = [ #plot 1 - genre { 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, # plot 2 - category { 'data': [Bar(x=category_colnames, y=category_boolean)], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count", 'automargin': True }, 'xaxis': { 'title': "Category", 'tickangle': 35, 'automargin': True } } }, # plot 3 - Top 10 Message Categories { 'data': [Bar(x=categories_colnames, y=categories_mean)], 'layout': { 'title': 'Top 10 Message Categories', 'yaxis': { 'title': "Percentage" }, 'xaxis': { 'title': "Categories" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) #category frequencies prep labels = df.iloc[:, 4:].sum().sort_values(ascending=False).reset_index() labels.columns = ['category', 'count'] label_values = labels['count'].values.tolist() label_names = labels['category'].values.tolist() #category top 10 prep category_counts = df.iloc[:, 4:].sum(axis=0).sort_values(ascending=False) category_top = category_counts.head(10) category_names = list(category_top.index) #top words word_srs = pd.Series(' '.join(df['message']).lower().split()) top_words = word_srs[~word_srs.isin(stopwords.words("english") )].value_counts()[:10] top_words_names = list(top_words.index) # create visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar( x=label_names, y=label_values, )], 'layout': { 'title': "Messages categories frequency", 'yaxis': { 'title': "Message Category Frequency" }, 'xaxis': { 'title': "Categories" } } }, { 'data': [Bar(x=category_names, y=category_top)], 'layout': { 'title': 'Top 10 Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Bar(x=top_words_names, y=top_words)], 'layout': { 'title': 'Most Frequent Words', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Words" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) #Category Counts category = list(df.iloc[:, 4:]) cat_count = [] for col in category: cat_count.append(df[col].sum()) category_mean = df.iloc[:, 4:].mean().sort_values(ascending=False) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=category, y=cat_count)], 'layout': { 'title': 'Messages Category Frequency', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Bar(x=category, y=category_mean)], 'layout': { 'title': 'Top Message Categories', 'yaxis': { 'title': "Percentage" }, 'xaxis': { 'title': "Category" } } }, ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) categories = df.iloc[:, 4:] cat_names = list(categories) cat_counts = [df[cat_name].sum() for cat_name in cat_names] categories['sum'] = categories.sum(axis=1) counts = categories.groupby('sum').count()['related'] names = list(counts.index) # another graph to show the numbers in each categorie categoriestwo = df.iloc[:, 4:].sum().sort_values(ascending=False) color_bar = 'Teal' # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Histogram( y=counts, ) ], 'layout': { 'title': 'Distribution of Messages in several categories', 'yaxis': { 'title': "Number of messages" }, 'xaxis': { 'title': "Number of included categories" }, } }, { 'data': [ Pie( labels=genre_names, values=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', } }, # ] { 'data': [goj.Bar( x=categoriestwo.index, y=categoriestwo, marker=dict(color='blue'), opacity=0.8 )], 'layout': goj.Layout( title="Messages per Category", xaxis=dict( title='Categoriestwo', tickangle=45 ), yaxis=dict( title='# of Messages', tickfont=dict( color='Black' ) ) ) } # } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) label_sums = df.iloc[:, 4:].sum().sort_values(ascending=False) label_names = list(label_sums.index) label_sums_news = df[df['genre'] == 'news'].iloc[:, 4:].sum() label_sum_direct = df[df['genre'] == 'direct'].iloc[:, 4:].sum() label_sum_social = df[df['genre'] == 'social'].iloc[:, 4:].sum() # create visuals # Created 3 visuals in the graph list graphs = [{ 'data': [ Bar(x=genre_names, y=genre_counts), ], 'layout': { 'title': 'Count of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Bar(x=label_names, y=label_sums_news, orientation='v', name='news'), Bar(x=label_names, y=label_sum_direct, name='direct'), Bar(x=label_names, y=label_sum_social, name='social') ], 'layout': { 'title': 'Distribution of Message Category', 'yaxis': { 'title': 'Count' }, 'xaxis': { 'title': 'Category', 'tickangle': -45, } } }, { 'data': [Pie(values=genre_counts, labels=genre_names)], 'layout': { 'title': 'Distribution of Message Genre', 'height': 400, 'width': 500 } }, { 'data': [ Heatmap(z=[label_sums_news, label_sum_social, label_sum_direct], x=label_names, y=['news', 'social', 'direct'], colorscale=[[0, 'white'], [0.5, 'grey'], [1.0, 'red']]) ], 'layout': { 'title': 'Heatmap', 'xaxis': { 'title': 'Category', 'tickangle': -45 } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # calculate number of messages by genre genre_counts = df.groupby('genre').count()['message'] genre_names = [x.title() for x in genre_counts.index] # calculate number of messages by category category_counts = df[df.columns[4:]].sum().sort_values(ascending=False) category_names = [ x.replace('_', ' ').title() for x in category_counts.index ] # calculate word cloud spacy_nlp = spacy.load('en_core_web_sm') message_all = df['message'] stop_words = spacy.lang.en.stop_words.STOP_WORDS vectorizer = CountVectorizer(stop_words=stop_words, ngram_range=(1, 1)) vect = vectorizer.fit_transform(message_all) words = vectorizer.get_feature_names() word_counts = vect.toarray().sum(axis=0) word_counts_df = pd.Series(word_counts, index=words).sort_values(ascending=False) query = pd.to_numeric(word_counts_df.index, errors='coerce').isna() word_counts_df = word_counts_df[query] top = word_counts_df[:100] words = top.index colors = [ plotly.colors.DEFAULT_PLOTLY_COLORS[random.randrange(1, 10)] for i in range(len(words)) ] weights = top.values**0.8 / 9 # create visuals graphs = [ # Graph 1 { 'data': [ Bar(x=genre_names, y=genre_counts, marker=dict(color='rgba(55, 128, 191, 0.7)', line=dict( color='rgba(55, 128, 191, 1.0)', width=2, ))) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, # Graph 2 { 'data': [ Bar(x=category_names, y=category_counts, marker=dict(color='rgba(50, 171, 96, 0.7)', line=dict( color='rgba(50, 171, 96, 1.0)', width=2, ))) ], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" }, 'margin': { 'l': 100, 'r': 100, 't': 100, 'b': 200 }, } }, # Graph 3 { 'data': [ Scatter(x=random.sample(range(len(words)), k=len(words)), y=random.sample(range(len(words)), k=len(words)), mode='text', text=words, marker={'opacity': 0.3}, textfont={ 'size': weights, 'color': colors }) ], 'layout': { 'xaxis': { 'showgrid': False, 'showticklabels': False, 'zeroline': False, 'range': [-len(words) * 0.1, len(words) * 1.15] }, 'yaxis': { 'showgrid': False, 'showticklabels': False, 'zeroline': False, 'range': [-len(words) * 0.1, len(words) * 1.15] }, 'title': '100 Most Frequent Words Appearing in Messages <br> Larger Text Represents Higher Frequency', # 'height' : 600, # 'width' : 1200, } }, ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # calculate the length of message group by each genre df_with_length = df.copy() df_with_length['length'] = df_with_length['message'].apply( lambda x: len(x)) genre_length_mean = df_with_length.groupby('genre').mean()['length'] # count the number of each categories and plot category_counts = df.drop(['id', 'message', 'genre'], 1).sum().sort_values(ascending=False) category_names = list(category_counts.index) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=genre_names, y=genre_length_mean)], 'layout': { 'title': 'Average Message length in each Genres', 'yaxis': { 'title': "Average length" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=category_names, y=category_counts)], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): """ Function Description: Funtion to display homepage and display graphs Input: None Output: Rendered HTML """ # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby( 'genre').count()['message'].reset_index().sort_values(by='message', ascending=False) # Adding Catagory Count Graph def countvals(filtered_df): CatDF = filtered_df.loc[:, filtered_df.columns. isin(list(filtered_df.columns[4:]))] countdict = {'Catagory': [], 'Value': []} for i in CatDF.columns: countdict['Catagory'].append(i) countdict['Value'].append(sum(CatDF[i].values)) return pd.DataFrame.from_dict(countdict).sort_values(by='Value', ascending=False) countdf = countvals(df) # Adding Catagory Count Graph per Genre countdfnews = countvals(df.loc[df.genre == 'news'])[:10] countdfdirect = countvals(df.loc[df.genre == 'direct'])[:10] countdfsocial = countvals(df.loc[df.genre == 'social'])[:10] # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_counts.genre, y=genre_counts.message)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=countdf.Catagory, y=countdf.Value)], 'layout': { 'title': 'Distribution of Message Catagory', "height": 500, 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "<br><br><br> Catagory", 'tickangle': 45 }, "margin": { "b": 150 } } }, { 'data': [Bar(x=countdfnews.Catagory, y=countdfnews.Value)], 'layout': { 'title': 'Top 10 Distribution of Message Catagory Where Genre Is News', "height": 500, 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "<br><br><br> Catagory", 'tickangle': 45 }, "margin": { "b": 150 } } }, { 'data': [Bar(x=countdfdirect.Catagory, y=countdfdirect.Value)], 'layout': { 'title': 'Top 10 Distribution of Message Catagory Where Genre Is Direct', "height": 500, 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "<br><br><br> Catagory", 'tickangle': 45 }, "margin": { "b": 150 } } }, { 'data': [Bar(x=countdfsocial.Catagory, y=countdfsocial.Value)], 'layout': { 'title': 'Top 10 Distribution of Message Catagory Where Genre Is Social', "height": 500, 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "<br><br><br> Catagory", 'tickangle': 45 }, "margin": { "b": 150 } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # Get most appearing categories category_names = list(df.iloc[:, 3:].sum().sort_values(ascending=False).index) category_counts = list(df.iloc[:, 3:].sum().sort_values(ascending=False).values) # Get top 10 tokens sorted_d = joblib.load("sorted_d.pkl") token_names = [x[0] for x in sorted_d[-10:]] token_counts = [x[1] for x in sorted_d[-10:]] # create visuals graphs = [ { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Bar( x=category_names, y=category_counts ) ], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [ Bar( x=token_names, y=token_counts ) ], 'layout': { 'title': 'Top 10 Tokens', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Tokens" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) classes_count = df.drop(columns=['id', 'message','original','genre']).sum() words = [re.sub(r"[^a-zA-Z0-9]", " ", text.lower()).split() for text in df['message']] words = [item for sublist in words for item in sublist] word_counts = pd.Series(words).value_counts() common_words = [] word_count_values = [] i=0 while len(common_words)<10: if word_counts.index[i] not in stopwords.words("english"): common_words.append(word_counts.index[i]) word_count_values.append(word_counts.iloc[i]) i+=1 # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [ Bar( x=list(classes_count.index), y=list(classes_count/len(df)*100) ) ], 'layout': { 'title': 'Occurance of classes in training set', 'yaxis': { 'title': "Percentage[%]" }, 'xaxis': { 'title': "Class" }, 'margin': {'b': 160} } }, { 'data': [ Bar( x=common_words, y=word_count_values ) ], 'layout': { 'title': 'Most common words', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "word" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals smallDF = df.iloc[:,4:] cats = smallDF.columns boolCats = (smallDF != 0).sum().values genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [ Bar( x=cats, y=boolCats ) ], 'layout': { 'title': 'Distribution of Catrgories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category", 'tickangle': 35 } } }, { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals # format data for genre counts genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # format data for category counts non_cat_cols = ['id', 'message', 'original', 'genre'] cat_cols = [col for col in df.columns if col not in non_cat_cols] cat_df = df[cat_cols] cat_counts = [cat_df[i].sum() for i in cat_df.columns] # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { # genre graph 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, # category graph { # genre graph 'data': [ Bar( x=cat_cols, y=cat_counts ) ], 'layout': { 'title': 'Distribution of Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) food_water_count = len(df[(df['water'] == 1) | (df['food'] == 1)]) electricity_shelter_count = len(df[(df['electricity'] == 1) | (df['shelter'] == 1)]) food_shelt_counts = [food_water_count ,electricity_shelter_count] food_shelt_names = ['Food and Water', 'Electricity and Shelter'] search_rescue_count = len(df[(df['search_and_rescue'] == 1)]) medical_help_count = len(df[(df['medical_help'] == 1)]) search_medical_counts = [search_rescue_count, medical_help_count] search_medical_names = ['Search and Rescue', ' Medical Help'] graphs = [ { 'data': [ Bar( x=food_shelt_names, y=food_shelt_counts ) ], 'layout': { 'title': 'People Needing Food and Water or Electricity and Shelter', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [ Bar( x=search_medical_names, y=search_medical_counts ) ], 'layout': { 'title': 'People Needing Search and Rescue or Medical Help', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # extract top 10 categories and counts category_counts = df.iloc[:, 4:].sum().sort_values(ascending=False)[:10] category_names = list(category_counts.index) # extract top 10 words and counts words_ls = [] for text in df['message'].values: text = re.sub(r'[^a-zA-Z0-9]', ' ', text.lower()) words = word_tokenize(text) words = words = [ w for w in words if w not in stopwords.words('english') ] words_ls.extend(words) df_words = pd.DataFrame({ 'words': list(Counter(words_ls).keys()), 'repeats': list(Counter(words_ls).values()) }) word_counts = df_words.nlargest(10, 'repeats')['repeats'] word_names = df_words.nlargest(10, 'repeats')['words'] # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=category_names, y=category_counts)], 'layout': { 'title': 'Distribution of Top 10 Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Bar(x=word_names, y=word_counts)], 'layout': { 'title': 'Distribution of Top 10 Message Words', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Word" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def draw_graph(self, x_key, y_key, graph_title): offline.plot({ "data": [Bar(x=self.data[x_key], y=self.data[y_key])], "layout": Layout(title=graph_title) }, filename=self.filename)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals medical_counts = df.groupby('medical_products').count()['message'] genre_counts = df.groupby('genre').count()['message'] value = list(medical_counts.index) g_value = list(genre_counts.index) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [ Bar( x=g_value, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genre', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Bar( x=value, y=medical_counts ) ], 'layout': { 'title': 'Distribution of Message Medical_products', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Medical_products" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): #Extract columns states = df['full_state'] deaths = df['deaths'] cases = df['cases'] hospital_bed = df['total_hospital_beds'] corr = df.corr() h_index = list(corr.iloc[:, 1:].index) h_values = corr.values today_deaths = pd.read_csv( 'ETL/covid_hospital_data.csv')['todayDeaths'].sum() #KPI's total_deaths = df['deaths'].sum() total_cases = df['cases'].sum() worst_state = df[df['deaths'] == df['deaths'].max()]['state'].iloc[0] # create visuals graphs = [{ 'data': [Bar(x=states, y=deaths)], 'layout': { 'title': 'Mortes de Covid-19 por estado', 'yaxis': { 'title': "Mortes" }, 'xaxis': { 'title': "Estado", 'tickangle': 70 }, 'margin': { 'b': 200 } } }, { 'data': [Bar(x=states, y=cases)], 'layout': { 'title': 'Casos de Covid-19 por estado', 'yaxis': { 'title': "Casos" }, 'xaxis': { 'title': "Estado", 'tickangle': 70 }, 'margin': { 'b': 200 } } }, { 'data': [Heatmap(x=h_index, y=h_index, z=h_values)], 'layout': { 'title': 'Mapa de calor de correlação entre as categorias', 'font': { 'size': 11 }, 'yaxis': { 'tickangle': 30 }, 'xaxis': { 'tickangle': 30 }, 'margin': { 'b': 200 } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON, total_deaths=total_deaths, total_cases=total_cases, worst_state=worst_state, today_deaths=today_deaths)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # create visuals top_category_count = df.iloc[:,4:].sum().sort_values(ascending=False)[1:6] top_category_names = list(top_category_count.index) no_top_category_count = df.iloc[:,4:].sum().sort_values(ascending=False)[-6:] no_top_category_names = list(no_top_category_count.index) graphs = [ { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Bar( x=top_category_names, y=top_category_count ) ], 'layout': { 'title': 'Top Five Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Categories" } } }, { 'data': [ Bar( x=no_top_category_names, y=no_top_category_count ) ], 'layout': { 'title': 'Last Five Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Categories" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)