def histograms(days): data = [ Histogram( x=[ day.overtime.total_seconds() / 3600 for day in days if day.overtime > datetime.timedelta() ], name="overtime", xbins={"size": 1}, opacity=0.75, ), Histogram( x=[ day.minus_hours.total_seconds() / 3600 for day in days if day.minus_hours < datetime.timedelta() ], name="minus hours", xbins={"size": 1}, opacity=0.75, ), ] layout = Layout( title="Overtime / Minus hours", xaxis={"title": "Overtime / Minus hours"}, yaxis={"title": "Occurrences (in days)"}, barmode="overlay", ) return dict(data=data, layout=layout)
def _histogram(self, names, x): concatenated_numeric_values = self._get_numeric_values( self._concatenated_data_frame, x) start = min(concatenated_numeric_values) end = max(concatenated_numeric_values) size = 1 if str(self._concatenated_data_frame[x].dtype) == 'category' \ else (max(concatenated_numeric_values) - min(concatenated_numeric_values)) / 10.0 fig = tools.make_subplots(rows=1, cols=len(names), print_grid=False) histogram_index = 1 for name in names: df = self._dataframes[name] numeric_values = self._get_numeric_values(df, x) text = df[x].cat.categories if str( df[x].dtype) == 'category' else None histogram = Histogram( name=name, x=numeric_values, xbins=dict( start=start, end=end, size=size, ), text=text, ) fig.append_trace(histogram, 1, histogram_index) fig.layout['xaxis' + str(histogram_index)].title = x fig.layout['xaxis' + str(histogram_index)].range = [start, end] fig.layout['yaxis' + str(histogram_index)].title = 'count' histogram_index += 1 fig.layout.width = min(500 * len(names), 1200) fig.layout.height = 500 iplot(fig)
def index(): ## For genre analysis # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) pos_ratios = list((df[df.columns[4:]] > 0).mean(axis=1)) cat_names = list(df.columns[4:]) message_lengths = df.message.apply( lambda text: len(TextBlob(text).tokens)).values # create visuals graphs = [{ 'data': [Bar( x=genre_names, y=genre_counts, )], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=cat_names, y=pos_ratios)], 'layout': { 'title': 'Distribution of Non-Zero labels in Each Category', 'yaxis': { 'title': "Ratio of Positive Instances" }, 'xaxis': { 'title': "Category Name" } } }, { 'data': [ Histogram(x=message_lengths, xbins=dict(start=np.min(message_lengths), size=0.8, end=np.max(message_lengths))) ], 'layout': { 'title': 'Distribution of Message Length', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Message Length" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # create visuals # TODO: Below is an example - modify to create your own visuals category_map = df.iloc[:, 4:].corr().values category_names = list(df.iloc[:, 4:].columns) category_names = df.iloc[:, 4:].columns category_boolean = (df.iloc[:, 4:] != 0).sum().values graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Histogram(x=df.length_under_600, )], 'layout': { 'title': 'Histogram of Messages Length below 600 character', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Length of Message" } } }, { 'data': [Bar(x=category_names, y=category_boolean)], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category", 'tickangle': 35 } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals counts = df.iloc[:, 4:].apply(lambda x: x.value_counts()).T counts.sort_values(by=1, ascending=False, inplace=True) categories = pd.Series(counts.index).str.replace('_', ' ').str.title() row_sums = df.iloc[:, 4:].sum(axis='columns') # create visuals # TODO: Below is an example - modify to create your own visuals colors = [ plotly.colors.DEFAULT_PLOTLY_COLORS[i % len( plotly.colors.DEFAULT_PLOTLY_COLORS)] for i in range(len(counts)) ] graphs = [{ 'data': [Bar( x=categories, y=counts[1], marker=dict(color=colors), )], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category", 'tickangle': -90 }, 'margin': { 'b': 160 } } }, { 'data': [Histogram(x=row_sums)], 'layout': { 'title': 'Distribution of Counts of Multiple Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Number of categories in message" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) cat_frequency = df[df.columns[4:]].sum().sort_values(ascending=False)[:10] cats = cat_frequency.index msg_len = df.message.apply(lambda x: len(x.split())) msg_len = msg_len[~((msg_len - msg_len.mean()).abs() > 3 * msg_len.std())] # create visuals graphs = [{ 'data': [Bar(x=cats, y=cat_frequency, marker=dict(color='#031c57'))], 'layout': { 'title': 'Top 10 Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Histogram(x=msg_len, marker=dict(color='#031c57'))], 'layout': { 'title': 'Distribution of Message Lengths', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Words/Message" } } }, { 'data': [Bar(x=genre_names, y=genre_counts, marker=dict(color='#031c57'))], 'layout': { 'title': 'Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) categories = df.iloc[:, 4:] cat_names = list(categories) cat_counts = [df[cat_name].sum() for cat_name in cat_names] categories['sum'] = categories.sum(axis=1) counts = categories.groupby('sum').count()['related'] names = list(counts.index) # create visuals #custom plotly graphs graphs = [{ 'data': [Histogram( y=counts, marker=dict(color='#4CB391'), )], 'layout': { 'title': 'Distribution of Messages in several categories', 'yaxis': { 'title': "Number of messages" }, 'xaxis': { 'title': "Number of included categories" }, } }, { 'data': [Bar(x=cat_names, y=cat_counts, marker=dict(color='#4CB391'))], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Pie(labels=genre_names, values=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) #proportion of categories category_counts = df[df.columns[4:]].sum() / len(df) category_counts.sort_values(ascending=False, inplace=True) category_names = list(category_counts.index) df['avg_categories'] = df[df.columns[4:]].sum(axis=1) # create visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=category_names, y=category_counts)], 'layout': { 'title': 'Distribution of Categories', 'yaxis': { 'title': "Proportion" }, 'xaxis': { 'title': "Categories" } } }, { 'data': [Histogram(x=df['avg_categories'], )], 'layout': { 'title': 'Number of Categories per message', 'yaxis': { 'title': "Number of messages" }, 'xaxis': { 'title': "Categories" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def showPlotlyhistogram(self): # Plotly Offline data = fd.data[fd.logcolumn] offline.plot({ "data": [ Histogram(x=data, xbins=dict(start=int(fd.fmin), end=int(fd.fmax), size=fd.binwidth)) ], "layout": Layout(title="Log10(D) histogram for " + self.cellid) })