def histograms(days): data = [ Histogram( x=[ day.overtime.total_seconds() / 3600 for day in days if day.overtime > datetime.timedelta() ], name="overtime", xbins={"size": 1}, opacity=0.75, ), Histogram( x=[ day.minus_hours.total_seconds() / 3600 for day in days if day.minus_hours < datetime.timedelta() ], name="minus hours", xbins={"size": 1}, opacity=0.75, ), ] layout = Layout( title="Overtime / Minus hours", xaxis={"title": "Overtime / Minus hours"}, yaxis={"title": "Occurrences (in days)"}, barmode="overlay", ) return dict(data=data, layout=layout)
def tab1_chart3(data): print('tab1 chart3') if data: values1 = data['pure time'] values2 = data['waits'] trace1 = Histogram( x=values1, opacity=0.75, name='pure time', xbins=dict( start=0, end=np.max([np.max(values1), np.max(values2)]), size=int( np.max([np.max(values1), np.max(values2)]) / 30)), marker=dict(color='#92b4f2')) trace2 = Histogram( x=values2, opacity=0.75, name='wait time', xbins=dict( start=0, end=np.max([np.max(values1), np.max(values2)]), size=int( np.max([np.max(values1), np.max(values2)]) / 30)), marker=dict(color='#ffb653')) layout = dict(barmode='overlay', title='Pure time vs. wait time per link histogram', showlegend=False, xaxis=dict(title='Time [s]'), yaxis=dict(title='Count'), margin=dict(l=50, r=50, t=80, b=50)) return Figure(data=[trace1, trace2], layout=layout) else: raise PreventUpdate
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # Extract number of categorizations per message categorization_counts = df.iloc[:, 4:].sum(axis=1) # Extract message character length and truncate outliers df['message_length'] = df.message.apply(len) message_chars_cutoff = df.message_length.quantile(0.99) message_char_lengths = df.query( 'message_length <= @message_chars_cutoff')['message_length'] # create visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Histogram(x=message_char_lengths.values, nbinsx=15)], 'layout': { 'title': 'Distribution of Message Character Count', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Character Count" } } }, { 'data': [Histogram(x=categorization_counts.values, nbinsx=15)], 'layout': { 'title': 'Distribution of Message Categorization Count', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Categorization Count" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract genre counts genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # extract categories category_map = df.iloc[:, 4:].corr().values category_names = list(df.iloc[:, 4:].columns) # extract length of texts length_direct = df.loc[df.genre == 'direct', 'text_length'] length_social = df.loc[df.genre == 'social', 'text_length'] length_news = df.loc[df.genre == 'news', 'text_length'] # create visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Heatmap(x=category_names, y=category_names[::-1], z=category_map)], 'layout': { 'title': 'Heatmap of Categories' } }, { 'data': [ Histogram(y=length_direct, name='Direct', opacity=0.5), Histogram(y=length_social, name='Social', opacity=0.5), Histogram(y=length_news, name='News', opacity=0.5) ], 'layout': { 'title': 'Distribution of Text Length', 'yaxis': { 'title': 'Count' }, 'xaxis': { 'title': 'Text Length' } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def histogram_categories_per_message(df): """Generate a histogram chart for the relative frequency of categories in each message. Args: df (dataframe): twitter data stored in a dataframe Returns: dict: return a dictionary for the histogram plot data and layout """ x = df.iloc[:, 3:].sum(axis=1).tolist() data = Histogram( x=x, histnorm="probability", marker=dict(color="rgb(158,202,225)", line=dict(color="rgb(8,48,107)", width=1.5)), ) layout = { "title": "Distribution of Categories Per Message", "yaxis": { "title": "Relative Frequency" }, "xaxis": { "title": "Number of Categories Per Message" }, } return {"data": [data], "layout": layout}
def index(): ## For genre analysis # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) pos_ratios = list((df[df.columns[4:]] > 0).mean(axis=1)) cat_names = list(df.columns[4:]) message_lengths = df.message.apply( lambda text: len(TextBlob(text).tokens)).values # create visuals graphs = [{ 'data': [Bar( x=genre_names, y=genre_counts, )], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=cat_names, y=pos_ratios)], 'layout': { 'title': 'Distribution of Non-Zero labels in Each Category', 'yaxis': { 'title': "Ratio of Positive Instances" }, 'xaxis': { 'title': "Category Name" } } }, { 'data': [ Histogram(x=message_lengths, xbins=dict(start=np.min(message_lengths), size=0.8, end=np.max(message_lengths))) ], 'layout': { 'title': 'Distribution of Message Length', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Message Length" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def date_histogram(input_df,dateCol, histTitle): """ Create a plotly figure of a Bar chart representing the different patent application dates in the dataset Input: dataset df as a pandas dataframe Output: plotly figure object containing the visualization """ # Convert date columns from object to datetime dummy_df = input_df[[dateCol]].apply(pd.to_datetime) # number of bins should be an intenger version of the days of the time delta binNumber = int( (dummy_df.max()-dummy_df.min()).dt.days[0] ) data = [] data.append(Histogram(x = dummy_df[dateCol], nbinsx = binNumber)) layout = Layout(title=histTitle, xaxis=dict( title='Year-Month-Day', tickangle=20 ), yaxis=dict( title='Patent Count', automargin= True, ) ) return Figure(data=data, layout=layout)
def _histogram(self, names, x): concatenated_numeric_values = self._get_numeric_values( self._concatenated_data_frame, x) start = min(concatenated_numeric_values) end = max(concatenated_numeric_values) size = 1 if str(self._concatenated_data_frame[x].dtype) == 'category' \ else (max(concatenated_numeric_values) - min(concatenated_numeric_values)) / 10.0 fig = tools.make_subplots(rows=1, cols=len(names), print_grid=False) histogram_index = 1 for name in names: df = self._dataframes[name] numeric_values = self._get_numeric_values(df, x) text = df[x].cat.categories if str( df[x].dtype) == 'category' else None histogram = Histogram( name=name, x=numeric_values, xbins=dict( start=start, end=end, size=size, ), text=text, ) fig.append_trace(histogram, 1, histogram_index) fig.layout['xaxis' + str(histogram_index)].title = x fig.layout['xaxis' + str(histogram_index)].range = [start, end] fig.layout['yaxis' + str(histogram_index)].title = 'count' histogram_index += 1 fig.layout.width = min(500 * len(names), 1200) fig.layout.height = 500 iplot(fig)
def graph4(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals lens = df.message.str.len() # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [ Histogram( x=lens, xbins=dict( # bins used for histogram start=0.0, end=600, size=5), ) ], 'layout': { 'title': 'Histogram', 'yaxis': { 'title': "# of Messages" }, 'xaxis': { 'title': "Message length" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('graph4.html', ids=ids, graphJSON=graphJSON)
def col_histograms(df): """ Generate plotly.Dash's Bar chart component. One barchart for each column of the dataframe :param df: pandas.DataFrame :return: dash_core_components.Graph """ describe_bar_chart = Div(children=[]) for col in df.columns: t = Histogram(x=df[col], name=col) l = Layout(title='Histogram for colunm {}'.format(t.name), autosize=False, width=500, height=500, margin={ 'l': 50, 'r': 50, 'b': 100, 't': 100, 'pad': 4 }) fig = Figure(data=[t], layout=l) g = Graph(figure=fig,className="col-sm-4", config={'staticPlot': True, 'responsive': True}) describe_bar_chart.children.append(g) return describe_bar_chart
def calc_error_Kiewitt(GRSA,rhoAA,CapacityA,refx,refy,nm=4,runNb=30,Cmin=3, Cmax=20,detailOn=True): errMax = 0 errMin = 0 errAv = 0 count = 0 err = np.zeros(nm) errV = [] if detailOn: print(' Nb - L nxr LpH Lav - L/Lav: error') print(' [-] [m] [-] [-] [m] [-] sec1 sec2 sec3 sec4') print('-----------------------------------------------------------') for i in range(runNb): GRS = GRSA[i] if GRSA[i].gN in {6, 7, 8, 9} and GRSA[i].span / GRSA[i].Lav > 8: # and GRSA[k].gR in {5,6,7,8,9}: if GRSA[i].gN == 9: div = 0.95 elif GRSA[i].gN == 5: div = 1 elif GRSA[i].gN == 6: div = 1.15 elif GRSA[i].gN == 7: div = 1.08 else: div = 1 pred = np.interp(rhoAA[i], refx, refy) cc = (GRS.span/GRS.height - 10.3) ** 2 / 26 + 1 predCapacity = pred * GRS.secT * GRS.Lav * cc * 1000 * GRS.nbnBns / GRS.PlanArea*div err = (CapacityA[i] - predCapacity) / CapacityA[i] * 100 if detailOn: print('{:4d} - {:d} {:d}x{:d} {:2.0f} {:4.1f} - {:5.1f}: '.format(i, GRS.span, GRS.gN,GRS.gR, GRS.span / GRS.height, GRS.Lav, GRS.span / GRS.Lav), end="", flush=True) for j in range(nm): if CapacityA[i, j] >= Cmin and CapacityA[i, j] <= Cmax: count += 1 if err[j] > 0 and errMax < err[j]: errMax = err[j] if err[j] < 0 and errMin > err[j]: errMin = err[j] errAv = errAv + err[j] errV.append(err[j]) if detailOn: print('{:5.1f}'.format(err[j]), end="", flush=True) else: if detailOn: print(bcolors.WARNING + '{:5.1f}'.format(err[j]) + bcolors.ENDC, end="", flush=True) if detailOn: print('') print('Max error: {:4.1f}%'.format(errMax)) print('Min error: {:4.1f}%'.format(errMin)) # tulbecsult print('Average error: {:4.1f}%'.format(errAv / count)) print(count) data = [Histogram(x=errV, xbins=dict(start=-59, end=59, size=2))] layout = Layout(bargap=0.05, xaxis1=dict(range=(-60, 60), tick0=-60., dtick=2., title='Felirat'), ) fig = Figure(data=data, layout=layout) return fig
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # create visuals # TODO: Below is an example - modify to create your own visuals category_map = df.iloc[:, 4:].corr().values category_names = list(df.iloc[:, 4:].columns) category_names = df.iloc[:, 4:].columns category_boolean = (df.iloc[:, 4:] != 0).sum().values graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Histogram(x=df.length_under_600, )], 'layout': { 'title': 'Histogram of Messages Length below 600 character', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Length of Message" } } }, { 'data': [Bar(x=category_names, y=category_boolean)], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category", 'tickangle': 35 } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) category_names = list(df.iloc[:, 4:].columns) category_boolean = (df.iloc[:, 4:] != 0).sum().values message_lengths = df.message.apply(lambda x: len(x)) message_lengths_reduced = [x for x in message_lengths if x < 1000 ] # removing longest messages for graph clarity # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(y=category_names, x=category_boolean, orientation='h')], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Category", 'tickangle': 35 }, 'xaxis': { 'title': "Count" } } }, { 'data': [Histogram(x=message_lengths_reduced, name='Message Length')], 'layout': { 'title': 'Distribution of Message Length', 'yaxis': { 'title': 'Count' }, 'xaxis': { 'title': 'Message Length' } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) message_lengths = df.message.str.len() message_lengths = message_lengths[message_lengths < 600] list_words = [] for text in df.message: list_words += tokenize(text) top_words = pd.Series(list_words).value_counts() top_words = top_words[top_words.index.str.len() > 2].iloc[:10] top_words_values = top_words.values top_words = top_words.index # create visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=top_words, y=top_words_values)], 'layout': { 'title': 'Top words in corpus', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Words" } } }, { 'data': [Histogram(x=message_lengths, nbinsx=50)], 'layout': { 'title': 'Distribution of Message length', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Number of Characters" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) cat_counts = df.drop(labels=['id', 'message', 'original', 'genre'], axis=1).sum() cat_names = list(cat_counts.index) message_lengths = df.message.apply(lambda x: len(tokenize(x))) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres - old graph', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=cat_names, y=cat_counts)], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Histogram(x=message_lengths)], 'layout': { 'title': 'Distribution of Message lengths', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Message length" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) categories = df.drop(['id', 'message', 'original', 'genre'], axis=1) category_counts = categories.sum().values category_names = list(categories.columns.values) df['word_count'] = df['message'].apply(lambda x: len(x.split())) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=category_names, y=category_counts)], 'layout': { 'title': 'Distribution of Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Histogram(x=df[df['word_count'] < 100]['word_count'])], 'layout': { 'title': 'Histogram of Message Word Count', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Message Word Count" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # Count of Messages by Genre genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # Determine Message Length in Words df['message_len'] = df['message'].str.split().str.len() language_genre_counts = df['message_len'] language_genre_sums = df.groupby('genre').mean()['message_len'] # create visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Histogram(x=language_genre_counts)], 'layout': { 'title': 'Message Lengths', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Length (# of Words)", 'range': [0, 200], 'tick0': 0, 'dtick': 25 } } }, { 'data': [Bar(x=genre_names, y=language_genre_sums)], 'layout': { 'title': 'Average Message Length by Genre', 'yaxis': { 'title': "Avg Length (# of Words)" }, 'xaxis': { 'title': "Genre" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) word_counts = df['message'].apply(lambda x: np.log10(len(x.split()))) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [ Bar( x=genre_names, y=genre_counts ) ], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [ Histogram( x=word_counts, ) ], 'layout': { 'title': 'Word Counts of Messages', 'yaxis': { 'title': "Count (number of messages)" }, 'xaxis': { 'title': "Word Counts (log10 scale)" } } } ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) multiple_labels = df.drop( columns=['id', 'message', 'original', 'genre']).sum(axis=1) positive_labels = df.drop( columns=['id', 'message', 'original', 'genre']).sum() # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Histogram(x=multiple_labels)], 'layout': { 'title': 'Histogram of Multiple Lables Instances', 'yaxis': { 'title': "Frequency" }, 'xaxis': { 'title': "# of Multiple Lables" } } }, { 'data': [Bar(x=positive_labels.index, y=positive_labels)], 'layout': { 'title': 'Distribution of positive lables', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Output Lables" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals counts = df.iloc[:, 4:].apply(lambda x: x.value_counts()).T counts.sort_values(by=1, ascending=False, inplace=True) categories = pd.Series(counts.index).str.replace('_', ' ').str.title() row_sums = df.iloc[:, 4:].sum(axis='columns') # create visuals # TODO: Below is an example - modify to create your own visuals colors = [ plotly.colors.DEFAULT_PLOTLY_COLORS[i % len( plotly.colors.DEFAULT_PLOTLY_COLORS)] for i in range(len(counts)) ] graphs = [{ 'data': [Bar( x=categories, y=counts[1], marker=dict(color=colors), )], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category", 'tickangle': -90 }, 'margin': { 'b': 160 } } }, { 'data': [Histogram(x=row_sums)], 'layout': { 'title': 'Distribution of Counts of Multiple Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Number of categories in message" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) #proportion of categories category_counts = df[df.columns[4:]].sum() / len(df) category_counts.sort_values(ascending=False, inplace=True) category_names = list(category_counts.index) df['avg_categories'] = df[df.columns[4:]].sum(axis=1) # create visuals graphs = [{ 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=category_names, y=category_counts)], 'layout': { 'title': 'Distribution of Categories', 'yaxis': { 'title': "Proportion" }, 'xaxis': { 'title': "Categories" } } }, { 'data': [Histogram(x=df['avg_categories'], )], 'layout': { 'title': 'Number of Categories per message', 'yaxis': { 'title': "Number of messages" }, 'xaxis': { 'title': "Categories" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) categories = df.iloc[:, 4:] cat_names = list(categories) cat_counts = [df[cat_name].sum() for cat_name in cat_names] categories['sum'] = categories.sum(axis=1) counts = categories.groupby('sum').count()['related'] names = list(counts.index) # create visuals #custom plotly graphs graphs = [{ 'data': [Histogram( y=counts, marker=dict(color='#4CB391'), )], 'layout': { 'title': 'Distribution of Messages in several categories', 'yaxis': { 'title': "Number of messages" }, 'xaxis': { 'title': "Number of included categories" }, } }, { 'data': [Bar(x=cat_names, y=cat_counts, marker=dict(color='#4CB391'))], 'layout': { 'title': 'Distribution of Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Pie(labels=genre_names, values=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) cat_frequency = df[df.columns[4:]].sum().sort_values(ascending=False)[:10] cats = cat_frequency.index msg_len = df.message.apply(lambda x: len(x.split())) msg_len = msg_len[~((msg_len - msg_len.mean()).abs() > 3 * msg_len.std())] # create visuals graphs = [{ 'data': [Bar(x=cats, y=cat_frequency, marker=dict(color='#031c57'))], 'layout': { 'title': 'Top 10 Message Categories', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Histogram(x=msg_len, marker=dict(color='#031c57'))], 'layout': { 'title': 'Distribution of Message Lengths', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Words/Message" } } }, { 'data': [Bar(x=genre_names, y=genre_counts, marker=dict(color='#031c57'))], 'layout': { 'title': 'Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) message_length = df['message'].apply(lambda x: len(x)) category_counts = df.iloc[:, 5:].sum() category_names = list(df.iloc[:, 5:].columns) # create visuals # TODO: Below is an example - modify to create your own visuals graphs = [ { 'data': [Bar(x=genre_names, y=genre_counts)], 'layout': { 'title': 'Distribution of Message Genres', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Genre" } } }, { 'data': [Bar(x=category_names, y=category_counts)], 'layout': { 'title': 'Distribution of Category Counts', 'yaxis': { 'title': "Count" }, 'xaxis': { 'title': "Category" } } }, { 'data': [Histogram(x=message_length, )], 'layout': { 'title': 'Length of Messages', 'xaxis': { 'title': "Message Length" } } }, ] # encode plotly graphs in JSON ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)] graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder) # render web page with plotly graphs return render_template('master.html', ids=ids, graphJSON=graphJSON)
def generate_continuous(self): """ Generates continuous features """ print("Generating continuous report...") print() columns, statistics = self.__initialize_statistics_continuous() for feature in self.__data: feature_values = [] # Parse feature values (pick the continuous ones) for feature_value in self.__data[feature]: feature_value = ("%s" % feature_value).strip() # Skip non-numeric features if feature_value and feature_value.isnumeric(): feature_values.append(int(feature_value)) # Does this feature has continuous properties? if len(feature_values) > 0: # Process descriptive statistics self.__process_statistics_continuous(statistics, feature, feature_values) # Render graph for this feature if statistics['Card.'][len(statistics['Card.']) - 1] >= 10: # High cardinality plot( [Histogram( x = self.__data[feature] )], filename=("./out/graph_continuous_%s.html" % feature) ) else: # Low cardinality occurences = self.__count_occurences(feature_values) plot([Bar( x = [key for key, value in iter(occurences.items())], y = [value for key, value in iter(occurences.items())] )], filename=("./out/graph_continuous_%s.html" % feature)) df = pandas.DataFrame(statistics) df = df.reindex(columns=columns) df.to_csv("./out/report_continuous.csv") print(df) print() print("Done generating continuous report.")
def calc_error_imp(GRSA, rhoAA, CapacityA, rhoAAp, CapacityAp, refx, refy, nm=4, runNb=30, Cmin=3, Cmax=20,detailOn=True,lim=0): errMax = 0 errMin = 0 errAv = 0 count = 0 err = np.zeros(nm) errV = [] if detailOn: print(' Nb - L g LpH Lav - L/Lav: error') print(' [-] [m] [-] [-] [m] [-] sec1 sec2 sec3 sec4') print('-----------------------------------------------------------') for i in range(runNb): GRS = GRSA[i] if GRS!=0: if GRS.span / GRS.Lav > 5: perf = np.interp(rhoAA[i], rhoAAp[i], CapacityAp[i]) decrese = (perf - CapacityA[i]) / perf cc = (GRS.span / GRS.height - 12) ** 2 / 51 + 0.92 pred = np.interp(rhoAA[i], refx, refy) predCapacity = pred * GRS.secT * GRS.Lav * cc * 1000 * GRS.nbnBns / GRS.span ** 2 err = (CapacityA[i] - predCapacity) / CapacityA[i] * 100 if detailOn: print('{:4d} - {:d} {:.0f} {:2.0f} {:4.1f} - {:5.1f}: '.format(i, GRS.span, GRS.gN, GRS.span / GRS.height, GRS.Lav, GRS.span / GRS.Lav), end="", flush=True) for j in range(nm): if CapacityA[i, j] >= Cmin and CapacityA[i, j] <= Cmax and decrese[j] > lim: count += 1 if err[j] > 0 and errMax < err[j]: errMax = err[j] if err[j] < 0 and errMin > err[j]: errMin = err[j] errAv = errAv + abs(err[j]) errV.append(err[j]) if detailOn: print('{:5.1f}'.format(err[j]), end="", flush=True) else: if detailOn: print(bcolors.WARNING + '{:5.1f}'.format(err[j]) + bcolors.ENDC, end="", flush=True) if detailOn: print('') print('Max error: {:4.1f}%'.format(errMax)) print('Min error: {:4.1f}%'.format(errMin)) # tulbecsult print('Average error: {:4.1f}%'.format(errAv / count)) print(count) data = [Histogram(x=errV, xbins=dict(start=-29, end=29, size=2))] layout = Layout(bargap=0.05, xaxis1=dict(range=(-30, 30), tick0=-30., dtick=2., title='Felirat'), ) fig = Figure(data=data, layout=layout) return fig
def calc_error_Omentes(GRSA,rhoAA,CapacityA,refx,refy,nm=4,runNb=30,Cmin=3, Cmax=20,detailOn=True): errMax = 0 errMin = 0 errAv = 0 count = 0 err = np.zeros(nm) errV = [] if detailOn: print(' Nb - L g LpH Lav - L/Lav: error') print(' [-] [m] [-] [-] [m] [-] sec1 sec2 sec3 sec4') print('-----------------------------------------------------------') for i in range(runNb): GRS = GRSA[i] if GRS.span / GRS.Lav > 7: if GRS.GeomType==1: c=-0.064 * GRS.span/GRS.height + 1.64 if i in range(48, 54): c = -0.064 * 8 + 1.64 # L/H=8 if GRS.GeomType==2: c=-0.073 * GRS.span/GRS.height + 1.74 pred = np.interp(rhoAA[i], refx, refy) predCapacity = pred * GRS.secT * GRS.Lav * c * 1000 * GRS.nbnBns / GRS.span ** 2 err = (CapacityA[i] - predCapacity) / CapacityA[i] * 100 if detailOn: print('{:4d} - {:d} {:.0f} {:2.0f} {:4.1f} - {:5.1f}: '.format(i, GRS.span, GRS.gN, GRS.span / GRS.height, GRS.Lav, GRS.span / GRS.Lav), end="", flush=True) for j in range(nm): if CapacityA[i, j] >= Cmin and CapacityA[i, j] <= Cmax: count += 1 if err[j] > 0 and errMax < err[j]: errMax = err[j] if err[j] < 0 and errMin > err[j]: errMin = err[j] errAv = errAv + err[j] errV.append(err[j]) if detailOn: print('{:5.1f}'.format(err[j]), end="", flush=True) else: if detailOn: print(bcolors.WARNING + '{:5.1f}'.format(err[j]) + bcolors.ENDC, end="", flush=True) if detailOn: print('') print('Max error: {:4.1f}%'.format(errMax)) print('Min error: {:4.1f}%'.format(errMin)) # tulbecsult print('Average error: {:4.1f}%'.format(errAv / count)) print(count) data = [Histogram(x=errV, xbins=dict(start=-49, end=49, size=2))] layout = Layout(bargap=0.05, xaxis1=dict(range=(-50, 50), tick0=-50., dtick=2., title='Felirat'), ) fig=Figure(data=data, layout=layout) return fig
def showPlotlyhistogram(self): # Plotly Offline data = fd.data[fd.logcolumn] offline.plot({ "data": [ Histogram(x=data, xbins=dict(start=int(fd.fmin), end=int(fd.fmax), size=fd.binwidth)) ], "layout": Layout(title="Log10(D) histogram for " + self.cellid) })
def create_data(self): return [ Histogram( x=self.kamervraag_durations, autobinx=False, xbins=dict(start=0, end=100, size=1), marker=dict(color=COLOR_INFO, line=dict( color=COLOR_PRIMARY, width=2, )), ) ]
def plotResults(ranksArrays, ranks, maxRank, testName="JUnit Test"): """ Plot a histogram of the ranks. @param ranksArrays (dict) Keys: model names. Values: List of TP ranks. @param ranks (dict) Keys: model names. Values: Averaged TP ranks. @param maxRank (int) Highest rank of TP possible. @return (str) Plot URLs. """ py.sign_in(os.environ["PLOTLY_USERNAME"], os.environ["PLOTLY_API_KEY"]) colors = [ "rgba(93, 164, 214, 0.5)", "rgba(255, 144, 14, 0.5)", "rgba(44, 160, 101, 0.5)", "rgba(255, 65, 54, 0.5)", "rgba(207, 114, 255, 0.5)", "rgba(193, 42, 72, 0.5)" ] histogramTraces = [] for i, (modelName, allRanks) in enumerate(ranksArrays.iteritems()): # Display distribution stats in legend mean = round(allRanks.mean(), 2) sk = round(skew(allRanks), 2) # Setup histogram for this model histogramTraces.append( Histogram( y=allRanks, name="{}: ({}, {})".format(modelName, mean, sk), autobiny=False, ybins=dict( start=0.0, end=maxRank, size=1.0, ), marker=dict(color=colors[i], ), opacity=0.7, )) histogramLayout = Layout( title="{} - Where are the True Positives?".format(testName), xaxis=dict(title="Count", ), yaxis=dict( title="Rank of TPs", range=[maxRank, 0], ), barmode="overlay", showlegend=True, ) histogramFig = Figure(data=histogramTraces, layout=histogramLayout) histogramURL = py.plot(histogramFig) return histogramURL