def create_treechart(labels=[], values=[], parents=[], title='', size=6, colors=[]): figure = go.Figure( go.Treemap(labels=labels, values=values, parents=parents, root_color="lightblue")) figure.update_layout(plot_bgcolor=plot_colors['background-color'], paper_bgcolor=plot_colors['background-color']) figure.update_traces( marker=dict(colors=colors, line=dict(color='#000000', width=2))) return html.Div(html.Div([ html.Div(html.Div( [ html.H4(title, className='subtitle is-4 has-text-centered is-bold'), dcc.Graph(id='tree-chart-' + str(title).replace(' ', '-'), figure=figure, config=global_graph_config), ], className='content', ), className='card-content'), ], className='card', style={'background-color': 'rgb(244, 244, 244)'}), className='column is-' + str(size))
def treeMapGraph(courtTable, key="Jurors"): labels = list(courtTable.keys()) parents_id = [ Court.query.filter(Court.name == court_name).first().parent for court_name in labels ] parents = [ Court(id=courtID).map_name if courtID is not None else "" for courtID in parents_id ] values = [courtTable[court_name][key] for court_name in labels] fig = go.Figure( go.Treemap( # ids=labels, labels=labels, parents=parents, values=values, branchvalues="total", hovertemplate='<b>%{label}</b><br>' + key + ': %{value}<br>Percentage of Parent Court: %{percentParent:.2%}<br>Percentage of General Court: %{percentRoot:.2%}<br>' )) fig['layout'].update(title=key, height=300, margin={ 'l': 10, 'b': 80, 't': 30, 'r': 30 }, legend={'orientation': 'h'}) return json.dumps(fig, cls=PlotlyJSONEncoder)
def ontology_figures(ontology_data): ontology_sunburst_figure = go.Figure(go.Sunburst(labels=ontology_data['labels'], parents=ontology_data['parents'], maxdepth=2)) ontology_treemap_figure = go.Figure(go.Treemap(labels=ontology_data['labels'], parents=ontology_data['parents'])) ontology_sunburst_figure.update_layout(margin=spacing, height=height, polar_bgcolor=bg_color, paper_bgcolor=bg_color, font_size=font_size, font_color=font_color) ontology_treemap_figure.update_layout(margin=spacing, height=height, polar_bgcolor=bg_color, paper_bgcolor=bg_color, font_size=font_size, font_color=font_color) return [ontology_treemap_figure, ontology_sunburst_figure]
def top_stakers_chart(data: dict): data_values_list = list(data.values()) total_staked = sum(data_values_list) # add Total entry as root element treemap_labels = (list(data.keys()) + ['Total']) treemap_values = data_values_list + [total_staked] treemap_parents = ['Total'] * len(data) + [ '' ] # set parent of Total entry to be root ('') fig = go.Figure(data=go.Treemap( branchvalues="total", labels=treemap_labels, name='', parents=treemap_parents, values=treemap_values, textinfo='none', hovertemplate= "<b>%{label} </b> <br> Stake Size: %{value:,.2f} NU<br> % of Network: %{percentRoot:.3% %}", marker=go.treemap.Marker(colors=list(data.keys()), colorscale='Viridis', line={"width": 1}), pathbar=dict(visible=False), ), layout=go.Layout( title=f'Top Stakers ({len(treemap_values)})', showlegend=False, font=dict(family="monospace", size=11, color="slategrey"), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', autosize=True, width=None, height=None, )) graph = dcc.Graph(figure=fig, id='top-stakers', config=GRAPH_CONFIG, style={ 'width': '100%', 'height': '100%' }) return graph
def update_treemap(clickData): if clickData: label = int( clickData['points'][0]['text'].split('<br>')[0].split(':')[-1]) dfsub = df_profile['micro'][df_profile['micro']['micro'] == label] values = list(dfsub['N']) labels = list(dfsub['AbbrTitle']) treemap_trace = go.Treemap(labels=labels, parents=[""] * len(labels), values=values) treemap_layout = go.Layout( {"margin": dict(t=10, b=10, l=5, r=5, pad=4)}) treemap_figure = {"data": [treemap_trace], "layout": treemap_layout} return treemap_figure, else: return []
def update_classes_tree_chart(dept, slider_year): chart_year = YEARS.get(slider_year).academic data_year = YEARS.get(slider_year).fiscal resp = table.query( KeyConditionExpression='PK = :pk AND SK BETWEEN :lower AND :upper', ExpressionAttributeValues={ ':pk': f'DEPT#{dept}', ':lower': f'DATA#AGG#CLASSES#{data_year}', ':upper': f'DATA#AGG#CLASSES#{int(data_year) + 1}$', }, ProjectionExpression='#c, ten_stat', ExpressionAttributeNames={'#c': 'count'}, ScanIndexForward=True, ) data = resp['Items'] labels, parents, values = [], [], [] for data_cat, chart_cat in tenure_categories.items(): labels.append(chart_cat) parents.append(chart_year) value = [ int(float(item.get('count'))) for item in data if item.get('ten_stat') == data_cat ] values.append(value[0]) chart_data = [] chart_data.append( go.Treemap( labels=labels, parents=parents, values=values, texttemplate='%{label}<br>%{percentRoot} (%{value})', )) chart_layout = go.Layout(margin=margin(l=70), ) return {'data': chart_data, 'layout': chart_layout}
def plotly_wordcloud(string): """Helper function from Plotly https://github.com/plotly/dash-sample-apps/blob/master/apps/dash-nlp/app.py""" word_cloud = WordCloud(stopwords=set(STOPWORDS), max_words=100, max_font_size=90) word_cloud.generate(string.lower()) word_list = [] freq_list = [] fontsize_list = [] position_list = [] orientation_list = [] color_list = [] for (word, freq), fontsize, position, orientation, color in word_cloud.layout_: word_list.append(word) freq_list.append(freq) fontsize_list.append(fontsize) position_list.append(position) orientation_list.append(orientation) color_list.append(color) x_arr = [] y_arr = [] for i in position_list: x_arr.append(i[0]) y_arr.append(i[1]) new_freq_list = [] for i in freq_list: new_freq_list.append(i * 80) trace = go.Scatter( x=x_arr, y=y_arr, textfont=dict(size=new_freq_list, color=color_list), hoverinfo="text", textposition="top center", hovertext=["{0} - {1}".format(w, f) for w, f in zip(word_list, freq_list)], mode="text", text=word_list, ) layout = go.Layout( { "xaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 250], }, "yaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 450], }, "margin": dict(t=20, b=20, l=10, r=10, pad=4), "hovermode": "closest", } ) wordcloud_figure_data = {"data": [trace], "layout": layout} word_list_top = word_list[:30] word_list_top.reverse() freq_list_top = freq_list[:30] freq_list_top.reverse() frequency_figure_data = { "data": [ { "y": word_list_top, "x": freq_list_top, "type": "bar", "name": "", "orientation": "h", } ], "layout": {"height": "550", "margin": dict(t=20, b=20, l=100, r=20, pad=4)}, } treemap_trace = go.Treemap( labels=word_list_top, parents=[""] * len(word_list_top), values=freq_list_top ) treemap_layout = go.Layout({"margin": dict(t=10, b=10, l=5, r=5, pad=4)}) treemap_figure = {"data": [treemap_trace], "layout": treemap_layout} return wordcloud_figure_data, frequency_figure_data, treemap_figure
def plotly_wordcloud(data_frame): """A wonderful function that returns figure data for three equally wonderful plots: wordcloud, frequency histogram and treemap""" complaints_text = list( data_frame["Consumer complaint narrative"].dropna().values) if len(complaints_text) < 1: return {}, {}, {} # join all documents in corpus text = " ".join(list(complaints_text)) word_cloud = WordCloud(stopwords=set(STOPWORDS), max_words=100, max_font_size=90) word_cloud.generate(text) word_list = [] freq_list = [] fontsize_list = [] position_list = [] orientation_list = [] color_list = [] for (word, freq), fontsize, position, orientation, color in word_cloud.layout_: word_list.append(word) freq_list.append(freq) fontsize_list.append(fontsize) position_list.append(position) orientation_list.append(orientation) color_list.append(color) # get the positions x_arr = [] y_arr = [] for i in position_list: x_arr.append(i[0]) y_arr.append(i[1]) # get the relative occurence frequencies new_freq_list = [] for i in freq_list: new_freq_list.append(i * 80) trace = go.Scatter( x=x_arr, y=y_arr, textfont=dict(size=new_freq_list, color=color_list), hoverinfo="text", textposition="top center", hovertext=[ "{0} - {1}".format(w, f) for w, f in zip(word_list, freq_list) ], mode="text", text=word_list, ) layout = go.Layout({ "xaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 250], }, "yaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 450], }, "margin": dict(t=20, b=20, l=10, r=10, pad=4), "hovermode": "closest", }) wordcloud_figure_data = {"data": [trace], "layout": layout} word_list_top = word_list[:25] word_list_top.reverse() freq_list_top = freq_list[:25] freq_list_top.reverse() frequency_figure_data = { "data": [{ "y": word_list_top, "x": freq_list_top, "type": "bar", "name": "", "orientation": "h", }], "layout": { "height": "550", "margin": dict(t=20, b=20, l=100, r=20, pad=4) }, } treemap_trace = go.Treemap(labels=word_list_top, parents=[""] * len(word_list_top), values=freq_list_top) treemap_layout = go.Layout({"margin": dict(t=10, b=10, l=5, r=5, pad=4)}) treemap_figure = {"data": [treemap_trace], "layout": treemap_layout} return wordcloud_figure_data, frequency_figure_data, treemap_figure
total = row['total'] title = row['original title'] parent = row['year'] labels.append(title) parents.append(parent) values.append(total) dropped.apply(divide_gross, axis=1) tree = go.Figure( go.Treemap( labels=labels, parents=parents, values=values, branchvalues='total', textinfo="label+value+percent parent", ), go.Layout(title='Rachel the Great and Powerful')) text = rachel_df['plot'].str.cat(sep=' ').lower() text = text.replace(':', '').replace('-', '').replace(',', '').replace( '.', '').strip(' ').replace('"', '').replace("'", '') stopwords = set(STOPWORDS) stopwords.update(["Anonymous", 'IMDb', ' ']) words = pd.Series(text.split(' ')).value_counts() words = words.loc[~words.index.isin(stopwords)] words = words[words.index != ''] words = words.to_frame('count').reset_index().rename({'index': 'word'}, axis=1)
def plotly_wordcloud(text): """A function that returns figure data for wordcloud""" list_words = text.split(" ") if len(list_words) < 1: return {} #mask = np.array(Image.open('assets/talk.png')) font_path = 'assets/MilkyNice-Clean.otf' word_cloud = WordCloud(stopwords=set(STOPWORDS), background_color="white", font_path=font_path, max_words=2000, max_font_size=256, random_state=42, #mask=mask, width=mask.shape[1],vheight=mask.shape[0] ) word_cloud.generate(text) word_list = [] freq_list = [] fontsize_list = [] position_list = [] orientation_list = [] color_list = [] for (word, freq), fontsize, position, orientation, color in word_cloud.layout_: word_list.append(word) freq_list.append(freq) fontsize_list.append(fontsize) position_list.append(position) orientation_list.append(orientation) color_list.append(color) # get the positions x_arr = [] y_arr = [] for i in position_list: x_arr.append(i[0]) y_arr.append(i[1]) # get the relative occurence frequencies new_freq_list = [] for i in freq_list: new_freq_list.append(i * 60) trace = go.Scatter( x=x_arr, y=y_arr, textfont=dict(size=new_freq_list, color=color_list), hoverinfo="text", textposition="top center", hovertext=["{0} - {1}".format(w, f) for w, f in zip(word_list, freq_list)], mode="text", text=word_list, ) layout = go.Layout( { "xaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, #"range": [-100, 250], }, "yaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, #"range": [-100, 450], }, "margin": dict(t=2, b=2, l=2, r=2, pad=1), "hovermode": "closest", } ) wordcloud_figure_data = {"data": [trace], "layout": layout} word_list_top = word_list[:60] word_list_top.reverse() freq_list_top = freq_list[:60] freq_list_top.reverse() treemap_trace = go.Treemap( labels=word_list_top, parents=[""] * len(word_list_top), values=freq_list_top, marker=dict(colorscale='Blackbody'), ) treemap_layout = go.Layout({"margin": dict(t=10, b=10, l=5, r=5, pad=4)}) treemap_figure = {"data": [treemap_trace], "layout": treemap_layout} return wordcloud_figure_data, treemap_figure
import pandas as pd import plotly import plotly.graph_objs as go from plotly.offline import * # To initiate ploty to run offline init_notebook_mode(connected=True) values = ["11", "12", "13", "14", "15", "20", "30"] labels = ["A1", "A2", "A3", "A4", "A5", "B1", "B2"] parents = ["", "A1", "A2", "A3", "A4", "", "B1"] fig = go.Figure( go.Treemap(labels=labels, values=values, parents=parents, marker_colorscale='Blues')) plotly.offline.plot(fig, filename='example_tree.html')
def plotly_wordcloud(flag): if (flag == 1): text = xyq_nlp.getCleanText() else: text = flag print(flag) if len(text) < 1: return {}, {} word_cloud = WordCloud(stopwords=set(STOPWORDS), max_words=100, max_font_size=90) word_cloud.generate(text) word_list = [] freq_list = [] fontsize_list = [] position_list = [] orientation_list = [] color_list = [] for (word, freq), fontsize, position, orientation, color in word_cloud.layout_: word_list.append(word) freq_list.append(freq) fontsize_list.append(fontsize) position_list.append(position) orientation_list.append(orientation) color_list.append(color) # get the positions x_arr = [] y_arr = [] for i in position_list: x_arr.append(i[0]) y_arr.append(i[1]) # get the relative occurence frequencies new_freq_list = [] for i in freq_list: new_freq_list.append(i * 80) trace = go.Scatter( x=x_arr, y=y_arr, textfont=dict(size=new_freq_list, color=color_list), hoverinfo="text", textposition="top center", hovertext=[ "{0} - {1}".format(w, f) for w, f in zip(word_list, freq_list) ], mode="text", text=word_list, ) layout = go.Layout({ "xaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 250], }, "yaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 450], }, "margin": dict(t=20, b=20, l=10, r=10, pad=4), "hovermode": "closest", }) wordcloud_figure_data = {"data": [trace], "layout": layout} word_list_top = word_list[:20] word_list_top.reverse() freq_list_top = freq_list[:20] freq_list_top.reverse() frequency_figure_data = { "data": [{ "y": word_list_top, "x": freq_list_top, "type": "bar", "name": "", "orientation": "h", }], "layout": { "height": "400", "margin": dict(t=20, b=20, l=100, r=20, pad=4) }, } treemap_trace = go.Treemap(labels=word_list_top, parents=[""] * len(word_list_top), values=freq_list_top) treemap_layout = go.Layout({"margin": dict(t=10, b=10, l=5, r=5, pad=4)}) treemap_figure = {"data": [treemap_trace], "layout": treemap_layout} return wordcloud_figure_data, frequency_figure_data
def make_frequency_and_treemap(data_frame): text = urlopen( "https://raw.githubusercontent.com/SonQBChau/JSON/main/wordcloud.txt" ).read().decode('utf-8') word_cloud = WordCloud( stopwords=set(STOPWORDS), max_words=30, max_font_size=90, ) word_cloud.generate(text) word_list = [] freq_list = [] fontsize_list = [] position_list = [] orientation_list = [] color_list = [] for (word, freq), fontsize, position, orientation, color in word_cloud.layout_: word_list.append(word) freq_list.append(freq) fontsize_list.append(fontsize) position_list.append(position) orientation_list.append(orientation) color_list.append(color) # get the positions x_arr = [] y_arr = [] for i in position_list: x_arr.append(i[0]) y_arr.append(i[1]) # get the relative occurence frequencies new_freq_list = [] for i in freq_list: new_freq_list.append(i * 80) trace = go.Scatter( x=x_arr, y=y_arr, textfont=dict(size=new_freq_list, color=color_list), hoverinfo="text", textposition="top center", hovertext=[ "{0} - {1}".format(w, f) for w, f in zip(word_list, freq_list) ], mode="text", text=word_list, ) layout = go.Layout({ "xaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 250], }, "yaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 450], }, "margin": dict(t=20, b=20, l=10, r=10, pad=4), "hovermode": "closest", }) wordcloud_figure_data = {"data": [trace], "layout": layout} word_list_top = word_list[:25] word_list_top.reverse() freq_list_top = freq_list[:25] freq_list_top.reverse() frequency_figure_data = { "data": [{ "y": word_list_top, "x": freq_list_top, "type": "bar", "name": "", "orientation": "h", }], "layout": { "height": "550", "margin": dict(t=20, b=20, l=100, r=20, pad=4) }, } treemap_trace = go.Treemap(labels=word_list_top, parents=[""] * len(word_list_top), values=freq_list_top) treemap_layout = go.Layout({"margin": dict(t=10, b=10, l=5, r=5, pad=4)}) treemap_figure = {"data": [treemap_trace], "layout": treemap_layout} return frequency_figure_data, treemap_figure
df_plot = df[(df['geo_summ'] == 'Domestic') & (df['year'] == df.year.max())] df_plot = df_plot.groupby(['pub_airlines', 'terminal'])['pax_count'] \ .sum().reset_index() # Filter out outliers df_plot = df_plot[df_plot['pax_count'] > 10000] # Add entries for parent node for tree map df_plot.loc[len(df_plot) + 1] = ['Terminal 1', '', 0] df_plot.loc[len(df_plot) + 2] = ['Terminal 2', '', 0] df_plot.loc[len(df_plot) + 3] = ['Terminal 3', '', 0] df_plot.loc[len(df_plot) + 4] = ['International', '', 0] # Data data = [] data.append( go.Treemap(labels=df_plot['pub_airlines'], values=df_plot['pax_count'], parents=df_plot['terminal'])) # Layout fig_title = 'Domestic Passenger Traffic by Airline and Terminal' layout = dict(title={ 'text': fig_title, 'x': 0.5 }, treemapcolorway=[ 'rgb(153, 153, 255)', 'rgb(255, 153, 253)', 'rgb(255, 153, 153)', 'rgb(255, 204, 153)' ]) fig = go.Figure(data=data, layout=layout) plotly.offline.plot(fig, filename='terminalpax_tree.html')
def generate_analysis_treemap( cls, location_name: Path, value_dict: IPathValueAnalysis, property_dict: Optional[IPathPropertyAnalysis], max_depth: int = 4, ): # logger.info("Generating treemap plot") file_id_list = [f'{i.absolute()}' for i in value_dict] tickvals, ticktext = zip(*property_dict.ticks().items()) if property_dict: markers_dict = dict( colors=[property_dict[i] for i in tqdm(value_dict)], showscale=True, colorscale=COLOR_SCALE, cmin=0, cmax=1, # cmax=max(entropy_dict.values()), colorbar=dict(title=f'{property_dict.name}'.capitalize(), titleside='top', tickmode='array', tickvals=tickvals, ticktext=ticktext, ticks='outside')) else: markers_dict = None # Original plot information: plot_data = [ go.Treemap( labels=[f'{i.name}' for i in value_dict], ids=file_id_list, parents=[f'{i.parent.absolute()}' for i in value_dict], values=[value_dict[i] for i in tqdm(value_dict)], marker=markers_dict, branchvalues='total', maxdepth=max_depth, ), ] title = (f"{property_dict.name} treemap for ({location_name})" ).capitalize() # Set graph layout: layout = go.Layout( title=title, autosize=True, ) # Generate result figure: # fig = go.Figure(data=plot_data, layout=layout) # logger.info('Plotting graph') div_data = plotly.offline.plot( { "data": plot_data, "layout": layout, }, output_type='div', config=dict(responsive=True), ) return div_data
def get_plots(cls, data: pd.DataFrame) -> Any: entities = dict(data["entity"].value_counts()) if len(entities) < 1: return {}, {}, {} word_cloud = WordCloud(stopwords=set(cls.stopwords), max_words=100, max_font_size=90) word_cloud.generate_from_frequencies(entities) word_list = [] freq_list = [] fontsize_list = [] position_list = [] orientation_list = [] color_list = [] for (word, freq ), fontsize, position, orientation, color in word_cloud.layout_: word_list.append(word) freq_list.append(freq) fontsize_list.append(fontsize) position_list.append(position) orientation_list.append(orientation) color_list.append(color) # get the positions x_arr = [] y_arr = [] for i in position_list: x_arr.append(i[0]) y_arr.append(i[1]) # get the relative occurence frequencies new_freq_list = [] for i in freq_list: new_freq_list.append(i * 80) new_freq_list = [ freq for freq in filter(lambda f: f > 1, new_freq_list) ] color_list = color_list[:len(new_freq_list)] word_list = word_list[:len(new_freq_list)] freq_list = freq_list[:len(new_freq_list)] trace = go.Scatter( x=x_arr, y=y_arr, textfont=dict(size=new_freq_list, color=color_list), hoverinfo="text", textposition="top center", hovertext=[ "{0} - {1}".format(w, f) for w, f in zip(word_list, freq_list) ], mode="text", text=word_list, ) layout = go.Layout({ "paper_bgcolor": 'rgba(0,0,0,0)', "plot_bgcolor": 'rgba(0,0,0,0)', "xaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 250], }, "yaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 450], }, # "margin": dict(t=20, b=20, l=10, r=10, pad=4), "margin": dict(t=10, b=10, l=5, r=5, pad=4), "hovermode": "closest", }) wordcloud_figure_data = {"data": [trace], "layout": layout} word_list_top = word_list[:25] word_list_top.reverse() freq_list_top = freq_list[:25] freq_list_top.reverse() frequency_figure_data = { "data": [{ "y": word_list_top, "x": freq_list_top, "type": "bar", "name": "", "orientation": "h", }], "layout": { "paper_bgcolor": 'rgba(0,0,0,0)', "plot_bgcolor": 'rgba(0,0,0,0)', 'color': 'white', "height": "550", "margin": dict(t=20, b=20, l=100, r=20, pad=4) }, } treemap_trace = go.Treemap(labels=word_list_top, parents=[""] * len(word_list_top), values=freq_list_top) treemap_layout = go.Layout({ "paper_bgcolor": 'rgba(0,0,0,0)', "plot_bgcolor": 'rgba(0,0,0,0)', "margin": dict(t=10, b=10, l=5, r=5, pad=4) }) treemap_figure = {"data": [treemap_trace], "layout": treemap_layout} return wordcloud_figure_data, frequency_figure_data, treemap_figure
paper_bgcolor='#1a1a1a', showlegend=False, ) fig.update_yaxes(showgrid=False, tickfont=dict(color='white')) fig.update_xaxes(showgrid=False, tickfont=dict(color='white')) #treemap values = wc labels = w parents = v fig4 = go.Figure( go.Treemap(labels=labels, values=values, parents=parents, marker_colors=[ "darkblue", "royalblue", "lightblue", "red", "lightred", "lightgray", "lightblue" ])) fig4.update_layout(paper_bgcolor='#1a1a1a', title_text='Total Fights by Win By', font=dict(color='rgb(255,255,255)')) #class weight colors2 = ['blue', 'blue', 'red', 'red'] * 14 fig5 = go.Figure(go.Bar( x=a1, y=b1, marker_color=colors2, orientation='h', ))
def plotly_wordcloud(data_frame): """A wonderful function that returns figure data for three equally wonderful plots: wordcloud, frequency histogram and treemap""" complaints_text = list(data_frame["Article Text"].dropna().values) if len(complaints_text) < 1: return {}, {}, {} # join all documents in corpus text = " ".join(list(complaints_text)) stopwords_wc = set(STOPWORDS) tokens = nltk.word_tokenize(text) text1 = nltk.Text(tokens) text_content = [word for word in text1 if word not in stopwords_wc] nltk_tokens = nltk.word_tokenize(text) bigrams_list = list(nltk.bigrams(text_content)) dictionary2 = [' '.join(tup) for tup in bigrams_list] vectorizer = CountVectorizer(ngram_range=(2, 2)) bag_of_words = vectorizer.fit_transform(dictionary2) sum_words = bag_of_words.sum(axis=0) words_freq = [(word, sum_words[0, idx]) for word, idx in vectorizer.vocabulary_.items()] words_freq = sorted(words_freq, key=lambda x: x[1], reverse=True) words_dict = dict(words_freq) WC_height = 1000 WC_width = 1500 WC_max_words = 200 word_cloud = WordCloud(max_words=WC_max_words, height=WC_height, width=WC_width, stopwords=stopwords_wc) word_cloud.generate_from_frequencies(words_dict) #word_cloud = WordCloud(stopwords=set(STOPWORDS), max_words=100, max_font_size=90) #word_cloud.generate(text) word_list = [] freq_list = [] fontsize_list = [] position_list = [] orientation_list = [] color_list = [] for (word, freq), fontsize, position, orientation, color in word_cloud.layout_: word_list.append(word) freq_list.append(freq) fontsize_list.append(fontsize) position_list.append(position) orientation_list.append(orientation) color_list.append(color) # get the positions x_arr = [] y_arr = [] for i in position_list: x_arr.append(i[0]) y_arr.append(i[1]) # get the relative occurence frequencies new_freq_list = [] for i in freq_list: new_freq_list.append(i * 80) trace = go.Scatter( x=x_arr, y=y_arr, textfont=dict(size=new_freq_list, color=color_list), hoverinfo="text", textposition="top center", hovertext=[ "{0} - {1}".format(w, f) for w, f in zip(word_list, freq_list) ], mode="text", text=word_list, ) layout = go.Layout({ "xaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 250], }, "yaxis": { "showgrid": False, "showticklabels": False, "zeroline": False, "automargin": True, "range": [-100, 450], }, "margin": dict(t=20, b=20, l=10, r=10, pad=4), "hovermode": "closest", }) wordcloud_figure_data = {"data": [trace], "layout": layout} word_list_top = word_list[:25] word_list_top.reverse() freq_list_top = freq_list[:25] freq_list_top.reverse() frequency_figure_data = { "data": [{ "y": word_list_top, "x": freq_list_top, "type": "bar", "name": "", "orientation": "h", }], "layout": { "height": "550", "margin": dict(t=20, b=20, l=100, r=20, pad=4) }, } treemap_trace = go.Treemap(labels=word_list_top, parents=[""] * len(word_list_top), values=freq_list_top) treemap_layout = go.Layout({"margin": dict(t=10, b=10, l=5, r=5, pad=4)}) treemap_figure = {"data": [treemap_trace], "layout": treemap_layout} return wordcloud_figure_data, frequency_figure_data, treemap_figure
mynew["class1name"].unique()) # In[ ]: assert len(labels) == len(parents) == len(values) == len(marker_colors) # In[ ]: import plotly.graph_objs as go trace1 = go.Treemap( labels=labels, values=values, parents=parents, marker_colors=marker_colors, pathbar={"visible": False}, insidetextfont={ "size": 1, "color": marker_colors }, ) data = [trace1] layout = go.Layout(margin={ "b": 0, "l": 0, "r": 0, "pad": 0, "t": 0 },