def plot_covariance_returns_correlation(correlation, title): graph_path = 'graphs/{}.html'.format(_sanatize_string(title)) data = [] dendro_top = ff.create_dendrogram(correlation, orientation='bottom') for i in range(len(dendro_top['data'])): dendro_top['data'][i]['yaxis'] = 'y2' data.extend(dendro_top['data']) dendro_left = ff.create_dendrogram(correlation, orientation='right') for i in range(len(dendro_left['data'])): dendro_left['data'][i]['xaxis'] = 'x2' data.extend(dendro_left['data']) heatmap_hover_text = _generate_hover_text(correlation.index, correlation.columns, correlation.values, 'Ticker 2', 'Ticker 1', 'Correlation') heatmap_trace = go.Heatmap(x=dendro_top['layout']['xaxis']['tickvals'], y=dendro_left['layout']['yaxis']['tickvals'], z=correlation.values, zauto=False, zmax=1.0, zmin=-1.0, text=heatmap_hover_text, hoverinfo='text') data.append(heatmap_trace) xaxis1_layout = { 'showgrid': False, 'showline': False, 'zeroline': False, 'showticklabels': False, 'ticks': "" } xaxis2_layout = { 'showgrid': False, 'zeroline': False, 'showticklabels': False } layout = go.Layout(title=title, showlegend=False, width=800, height=800) figure = go.Figure(data=data, layout=layout) figure['layout']['xaxis'].update({'domain': [.15, 1]}) figure['layout']['xaxis'].update(xaxis1_layout) figure['layout']['yaxis'].update({'domain': [0, .85]}) figure['layout']['yaxis'].update(xaxis1_layout) # figure['layout']['xaxis2'].update({'domain': [0, .15]}) # figure['layout']['xaxis2'].update(xaxis2_layout) # figure['layout']['yaxis2'].update({'domain': [.825, .975]}) # figure['layout']['yaxis2'].update(xaxis2_layout) offline_py.plot(figure, filename=graph_path, auto_open=False) display( HTML( 'The graph for {} is too large. You can view it <a href="{}" target="_blank">here</a>.' .format(title, graph_path)))
def clustermap(df): orientation = {'arch' : 'right', 'other' : 'bottom'} dendro_arch = ff.create_dendrogram(df, orientation= orientation['arch'], labels=df.index) dendro_arch_leaves = dendro_arch['layout']['yaxis']['ticktext'] dendro_other = ff.create_dendrogram(df.T, orientation= orientation['other'], labels=df.T.index) dendro_other_leaves = dendro_other['layout']['xaxis']['ticktext'] clustered_df = df[dendro_other_leaves].loc[dendro_arch_leaves] return {'heatmap':clustered_df,'arch_dendro': dendro_arch,'other_dendro': dendro_other}
def dendrogram(nd, sample_list, p_cat_spec): # load categorical spectra f_cat_spec = extra_tools.get_data_file(p_cat_spec) cat_spec = pd.read_table(f_cat_spec, index_col=0) nd_spec = cat_spec.as_matrix().T sample_spec = [ 'GOLD-{}'.format(c).upper() for c in cat_spec.columns.tolist() ] nd = np.vstack((nd, nd_spec)) sample_list.extend(sample_spec) def _PSD_dist(nd): return hc.distance.pdist(nd, extra_tools.PSD_sym_KL) dend = ff.create_dendrogram(nd, labels=sample_list, distfun=_PSD_dist, linkagefun=hc.centroid, orientation='left') dend['layout'].update({ 'width': 1500, 'height': 800, 'font': dict(size=18), 'margin': go.Margin(l=450), }) dend['layout']['xaxis'].update({'title': 'KL Divergence'}) # div_dend = py.plot(dend, output_type='div') return dend
def generate_dendrogram(reducedX, new_labels, dendroname): fig = ff.create_dendrogram(reducedX, orientation='left', labels=new_labels) print("dendogram created\n") fig['layout'].update({'width': 1000, 'height': 800}) print("plotting started\n") plot(fig, filename=dendroname) print("plotting done\n")
def display_tree(filtered_data, words): if len(words): filtered_data = pd.DataFrame(filtered_data) taksony = filtered_data['takson w bazie'] filtered_data = filtered_data[['kraj', 'region', 'miejscowość']] kraje = filtered_data['kraj'] regiony = filtered_data['region'] miejsca = filtered_data['miejscowość'] dict_kraje = {} dict_regiony = {} dict_miejsca = {} for i, kraj in enumerate(kraje.values): dict_kraje[kraj] = i for i, region in enumerate(regiony.values): dict_regiony[region] = i for i, miejsc in enumerate(miejsca.values): dict_miejsca[miejsc] = i for key in dict_kraje.keys(): filtered_data['kraj'] = filtered_data['kraj'].replace( key, dict_kraje[key]) for key in dict_regiony.keys(): filtered_data['region'] = filtered_data['region'].replace( key, dict_regiony[key]) for key in dict_miejsca.keys(): filtered_data['miejscowość'] = filtered_data[ 'miejscowość'].replace(key, dict_miejsca[key]) print(filtered_data) fig = ff.create_dendrogram(filtered_data, labels=miejsca.values) return html.Div([dcc.Graph(id='dendro_map', figure=fig)])
def update_dendro_graph(num_clusters, selectedData, selected_metrics, norm): app._prev_cluster_clicks data = app._df.loc[app._df.visible].dropna()[selected_metrics] if norm == 'Znorm': data = data.apply(zscore) color_thresh = None if app._prev_cluster_clicks == 0: color_thresh = 0.0 dendro = ff.create_dendrogram( data, linkagefun=lambda x: shc.linkage( data, 'ward', metric='euclidean'), color_threshold=color_thresh) dendro['layout'].update({ 'height': 600, 'xaxis': { 'automargin': True, 'showticklabels': False } }) return dendro
def draw_dendogram(data, title, labels, format): figure = ff.create_dendrogram( data, orientation='left', labels=labels, linkagefun=lambda x: cx.get_linkage(data, 'single', 'euclidean')) figure['layout']['title'] = title # remove ticks from axis, change font size and remove lines in xaxis figure['layout']['xaxis'].update({'ticks': '', 'tickfont': dict(size=24)}) figure['layout']['yaxis'].update({ 'ticks': '', 'tickfont': dict(size=24), 'showline': False }) # margin configuration figure['layout']['margin'].update({ 'b': 40, 'l': 300, 'r': 15, 't': 40, 'pad': 5 }) figure['layout'].update({'autosize': False, 'width': 1200, 'height': 1500}) fname = title + '_dendogram' if format == 'pdf': pio.write_image(figure, fname + '.' + format) else: offline.plot(figure, filename=fname + '.html')
def plot(self): title = self.titlestring % (self.DS.name, self.DS.clustname, self.DS.levels) self.shortname = self.DS.shortclustname + self.shortname means = [] for c in self.DS.clusters[self.DS.levels]: means.append(np.average(c, axis=0)) X = np.column_stack(means).T try: fig = ff.create_dendrogram(X) except: return ''' <div class="row" style="margin-top:20%"> <div class="col-md-4 offset-md-4 text-center"> <h1><b>Only one cluster found.</b></h1> <h3>Perhaps try another algorithm?</h2> </div> ''' if self.plot_mode != "div": fig["layout"]["title"] = title fig["layout"]["xaxis"]["title"] = "Cluster Labels" fig["layout"]["yaxis"]["title"] = "Cluster Mean Distances" #del fig.layout["width"] #del fig.layout["height"] return self.makeplot(fig, "agg/" + self.shortname)
def plotly_dendrogram(df: pd.DataFrame(), labels=None, orientation='left', color_threshold=1, height=None, width=None, max_label_lenght=None): if labels is None: labels = df.index if max_label_lenght is not None: labels = [i[:max_label_lenght] for i in labels] if height is None: height = max(500, 10 * len(df)) fig = ff.create_dendrogram(df, color_threshold=color_threshold, labels=labels, orientation=orientation) fig.update_layout(width=width, height=height, font_family="Monospace") fig.update_layout(xaxis_showgrid=True, yaxis_showgrid=True) fig.update_yaxes(automargin=True) fig.update_xaxes(automargin=True) return fig
def hierarchical(): df = pd.read_csv("data/cluster.csv") X = df.drop(['connections'], axis=1) fig = ff.create_dendrogram(X, orientation='bottom', labels=list(df['connections']), linkagefun=lambda x: linkage(X, 'ward', metric='euclidean')) print list(df['connections']) fig['layout'].update({'width':1200, 'height':650, 'title': 'Hierarchical Clustering', 'margin': {'b':250}}) graphJSON = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) return render_template('hierarchical.html', graphJSON=graphJSON)
def generate_dendro(words): try: similarities = np.array([word_vectors.distances(w, words) for w in words]) figure = ff.create_dendrogram(similarities, labels=words) figure['layout'].update({'width': 800, 'height': 500}) return figure except KeyError: pass
def get_dendrogram_graph(self, document_list): tfidf_matrix = self.__get_tfidf_matrix(document_list) similarity_matrix = 1 - cosine_similarity(tfidf_matrix) names = [str(i) for i in range(len(similarity_matrix))] fig = ff.create_dendrogram(similarity_matrix, orientation='left', labels=names) fig['layout'].update({'width':1000, 'height':1500}) fig['layout']["images"] = self.watermark_image fig['layout']["title"] = "DENDOGRAM GRAPH" return iplot(fig, filename='dendrogram_with_labels')
def dendrogram(df, title='Dendrogram', out_path=None, layout_kwargs={}, to_image=False): fig = ff.create_dendrogram(df, linkagefun=lambda x: linkage(df, method='ward', metric='euclidean')) layout_kwargs['title'] = title fig.update_layout(**layout_kwargs) generate_plot(fig, out_path=out_path, out_filename=title, to_image=to_image)
def sort_row_data(ar, label): if CLUSTERING_MODE: npar = np.array(ar) fig_tmp = ff.create_dendrogram(npar, orientation='right') leaves = list(map(int, fig_tmp['layout']['yaxis']['ticktext'])) return [ar[i] for i in leaves], [label[i] for i in leaves] else: to_sort = [("".join([str(e) for e in ar[i]]), ar[i], label[i]) for i in range(len(label))] to_sort.sort() return [t[1] for t in to_sort], [t[2] for t in to_sort]
def subplot_dendogram(self, l1_df, l2_df, l3_df, l1_label, l1_color): # we will pass three dataframe 1) high level clustering # 2) second level clustering # 3) third level clustering # l1_df, data_frame for first level clustering # l2_df, data frame for second level clustering # l3_df, dataframe for 3rd level clustering # this function we will use to plot dendogram for cluster and sub-clusterLevel fig = make_subplots(rows=1, cols=3, subplot_titles=("Level-1", "Level-2", "Level-3")) if len(l1_label) > 1: fig = ff.create_dendrogram(l1_df.to_numpy(), orientation='right', labels=l1_label) y = list(fig['layout']['yaxis']['tickvals']) new_labels = list(fig['layout']['yaxis']['ticktext']) else: fig = go.Figure() new_labels = list(l1_label) y = [1] x = np.full((1, len(y)), .1)[0] size = np.full((1, len(y)), 10)[0] color = np.full((1, len(y)), '#AED6F1')[0] ## we want to change color of sub cluster which contains that genes inds = [] if not l1_color == None: try: for label in l1_color: inds.append(new_labels.index(label)) except Exception: pass color[inds] = '#F1C40F' # print('inds',inds,new_labels,color_labels) fig.add_trace( go.Scatter(mode='markers', x=x, y=y, text=new_labels, hoverinfo='text', marker=dict(size=size, color=color))) #### return fig
def update_dendrogram(hc_data): if hc_data == {}: return go.Figure() Z = np.array(hc_data['linkage']) figure = FF.create_dendrogram( Z, orientation='bottom', linkagefun=lambda x: linkage(Z, 'ward', metric='euclidean')) return figure
def render_dendrogram(animal_list, results, outdir, outfilename, threshold=0.5): """ Renders a Dendrogram given a BDD or CSD matrix. Given an animal_list of length n and an n by n triangular matrix of distances between the animal objects, this function renders a dendrogram using Plotly and prints the result (in both .png and .html form) to outdir. Parameters ---------- animal_list : list of Animal() objects Corresponds to the animals that the pair-wise distances were calculated for. Order is assumed to match the order of the results. results : 2D array of floats (upper-triangular, empty diagonal) results[i][j] is the distances between trajectories of animal[i] and animal[j]. outdir : str Absolute path to the output directory for the .csv files exported by the function. outfilename : str Name that will be given to the files printed by this function. threshold : float Value at which the separation of clusters in the dendrogram will be made. """ html_outpath = os.path.join(outdir, outfilename + '.html').replace(' ', '') png_outpath = os.path.join(outdir, outfilename + '.png').replace(' ', '') # Flatten results into condensed distance array dists = [item for sublist in results for item in sublist] dists = np.array([[d for d in dists if d != '']]) fig = ff.create_dendrogram(dists, labels=animal_list, distfun=lambda x: x[0], linkagefun=lambda y: linkage(y, 'ward'), color_threshold=threshold) animals_sorted = fig['layout']['xaxis']['ticktext'] label_vals = fig['layout']['xaxis']['tickvals'] def tickgen(animal): color = COLORS[animal.get_group()] text = animal.get_name() return f"<span style='color:{str(color)}'> {str(text)} </span>" fig.update_layout( xaxis={ 'ticktext': [tickgen(a) for a in animals_sorted], 'range': [0, label_vals[0] + label_vals[-1]] }) fig.write_image(png_outpath) plotly.offline.plot(fig, filename=html_outpath, auto_open=False)
def update_figure(input_text): st = sent_tokenize(input_text) labs = [s[:130] for s in st] sent_vecs = np.array([sent_embeddings(x) for x in st]) # calculate full dendrogram fig = ff.create_dendrogram( sent_vecs, orientation='left', labels=labs, linkagefun=lambda x: linkage(sent_vecs, 'complete', metric='cosine')) fig.update_layout(margin=dict(l=800)) return fig
def clustered_data(self): data_array, labels, pathways = self._map_to_data_array() dx = ff.create_dendrogram( data_array, orientation='bottom', linkagefun=self.linkage_func) dy = ff.create_dendrogram( data_array.T, orientation='right', linkagefun=self.linkage_func) x_dendro_leaves = list(map(int, dx['layout']['xaxis']['ticktext'])) y_dendro_leaves = list(map(int, dy['layout']['yaxis']['ticktext'])) heat_data = data_array.T heat_data = heat_data[y_dendro_leaves, :] heat_data = heat_data[:, x_dendro_leaves] hx = np.array( list(map(lambda x: '%s_%d' % x, zip(labels, range(len(labels))))))[ x_dendro_leaves] hy = pathways[y_dendro_leaves] return {'x': hx.tolist(), 'y': hy.tolist(), 'z': heat_data.tolist()}
def load_dendogram_cluster(self, med_df, orig_labels, height=900, color_labels=None): # this function we will use to plot dendogram for cluster and sub-clusterLevel if len(orig_labels) > 1: fig = ff.create_dendrogram(med_df.to_numpy(), orientation='right', labels=orig_labels) y = list(fig['layout']['yaxis']['tickvals']) new_labels = list(fig['layout']['yaxis']['ticktext']) else: fig = go.Figure() new_labels = list(orig_labels) y = [1] x = np.full((1, len(y)), .1)[0] size = np.full((1, len(y)), 10)[0] color = np.full((1, len(y)), '#AED6F1')[0] ## we want to change color of sub cluster which contains that genes inds = [] if not color_labels == None: try: for label in color_labels: inds.append(new_labels.index(label)) except Exception: pass color[inds] = '#F1C40F' # print('inds',inds,new_labels,color_labels) fig.add_trace( go.Scatter(mode='markers', x=x, y=y, text=new_labels, hoverinfo='text', marker=dict(size=size, color=color))) # fig.update_layout(width=600, height=900) fig.update_layout(height=height, clickmode='event+select') fig['layout']['yaxis']['side'] = 'right' fig['layout']['margin']['r'] = 5 fig['layout']['margin']['l'] = 5 return fig
def heatmap_by_clustering(table_correlations, hovertemplate, customdata, zmin=-1, zmax=1): fig = create_dendrogram(table_correlations.replace(np.nan, 0), orientation="bottom", distfun=lambda df: 1 - df) for scatter in fig["data"]: scatter["yaxis"] = "y2" order_dendrogram = list(map(int, fig["layout"]["xaxis"]["ticktext"])) labels = table_correlations.columns[order_dendrogram] fig.update_layout(xaxis={"ticktext": labels, "mirror": False}) fig.update_layout(yaxis2={"domain": [0.85, 1], "showticklabels": False, "showgrid": False, "zeroline": False}) heat_correlations = table_correlations.loc[labels, labels].values if customdata is not None: heat_customdata = customdata.loc[labels, labels].values else: heat_customdata = None heatmap = go.Heatmap( x=fig["layout"]["xaxis"]["tickvals"], y=fig["layout"]["xaxis"]["tickvals"], z=heat_correlations, colorscale=BLUE_WHITE_RED, customdata=heat_customdata, hovertemplate=hovertemplate, zmin=zmin, zmax=zmax, ) fig.update_layout( yaxis={ "domain": [0, 0.85], "mirror": False, "showgrid": False, "zeroline": False, "ticktext": labels, "tickvals": fig["layout"]["xaxis"]["tickvals"], "showticklabels": True, "ticks": "outside", "tickfont": {"size": 15}, }, xaxis={"tickfont": {"size": 15}}, ) fig.add_trace(heatmap) fig["layout"]["width"] = 1100 fig["layout"]["height"] = 1100 return fig
def draw_dendrogram(X): dendro = ff.create_dendrogram(X) dendro['layout'].update({'width': 800, 'height': 500}) # aPlot = plotly.offline.plot(fig, # config={"displayModeBar": False}, # show_link=False, # include_plotlyjs=False, # output_type='div') # py.iplot(dendro, filename='simple_dendrogram') p = plotly.offline.plot(dendro, filename='dendrogram', output_type='div', include_plotlyjs=False, show_link=False) return p
def main(): fName = sys.argv[1] rowIds = None fIn = open(fName, "r") df = pd.read_csv(fName, header=None, skiprows=1) colsToUse = fIn.readline().strip("\n").split(",") colsToDrop = [] for i in range(len(colsToUse)): if colsToUse[i] == '0': if i == 0: rowIds = df.iloc[:,0] colsToDrop.append(i) df.drop(df.columns[colsToDrop], axis = 1, inplace=True) fig = ff.create_dendrogram(df) fig.update_layout(width=800, height=500) fig.show()
def hiearchical_clustering(df_dendogram): import plotly.figure_factory as ff from sklearn.preprocessing import StandardScaler from scipy.cluster.hierarchy import dendrogram, linkage, cophenet from scipy.spatial.distance import pdist import scipy.cluster.hierarchy as sch scaler = StandardScaler() df_preprocess = scaler.fit_transform(df_dendogram.iloc[:, 2:]) list_coph = [] methods = { 'single': 'Nearest Point Algorithm (single)', 'complete': 'Farthest Point Algorithm (complete)', 'ward': 'Incremental Algorithm (ward)', 'average': 'UPGMA Algorithm (average)', 'weighted': 'WPGMA Algorithm (weighted)', 'median': 'WPGMC Algorithm (median)', 'centroid': 'UPGMC Algorithm (centroid)' } for m in list(methods.keys()): Z = linkage(df_preprocess, m) c, coph_dists = cophenet(Z, pdist(df_preprocess)) list_coph.append(c) results = pd.DataFrame( zip(list(methods.values()), list_coph), columns=['Algorithm and Distance Method', 'Cophenetic Correlation']) results = results.sort_values(by='Cophenetic Correlation', ascending=False) results.reset_index(drop=True, inplace=True) best = [ m for m, c in zip(list(methods.keys()), list_coph) if c == max(list_coph) ] names = list(df_dendogram.Squad) fig = ff.create_dendrogram(df_preprocess, orientation='bottom', labels=names, linkagefun=lambda x: sch.linkage(x, best[0])) fig.update_layout(width=800, height=500, title = 'Dendogram using ' + \ " ".join(methods[best[0]].split(" ")[:-1])) #paper_bgcolor='rgb(143,188,143)') return fig, results
def _get_dendrogram_fig(self) -> Figure: """Generate a dendrogram figure object in plotly. :return: A plotly figure object """ labels = [ self._id_temp_label_map[file_id] for file_id in self._doc_term_matrix.index.values ] return ff.create_dendrogram( self._doc_term_matrix, orientation=self._dendro_option.orientation, distfun=lambda matrix: pdist( matrix, metric=self._dendro_option.dist_metric), linkagefun=lambda dist: linkage( dist, method=self._dendro_option.linkage_method), labels=labels)
def make_dendogram_plot(name,rep_set,loci_names,loci_dict,filename): dendogram_matrix = [] for sample1 in rep_set: dendogram_array = [] for sample2 in rep_set: correct_calls = [] for locus in loci_names: if loci_dict[sample1][locus]['genotype'] == loci_dict[sample2][locus]['genotype']: correct_calls.append(1) else: correct_calls.append(0) cell_percent = sum(correct_calls) / len(correct_calls) dendogram_array.append(cell_percent) dendogram_matrix.append(dendogram_array) data_frame = numpy.array(dendogram_matrix) dendro = ff.create_dendrogram(data_frame, orientation='left', labels=rep_set) dendro['layout'].update({'width':800, 'height':500}) py.image.save_as(dendro, filename=filename)
def main(): """ Тут відбувається обчислення всіх масивів й побудова графіків. """ names = ['alpha', 'beta', 'gamma', 'delta'] companies = array([[67, 57, 49, 81, 63], [73, 59, 41, 87, 59], [65, 57, 43, 77, 63], [67, 55, 87, 73, 63]]) z = scale(companies) weights = linspace(1, 5, 5) unweighted = score(z) non_normalized = score(z, weights) normalized = score(z, weights / sum(weights)) figure = Figure( ) # Графік оцінок кожної з варіацій таксонометричного методу. figure.add_trace(Bar(name='Незважені', x=names, y=unweighted)) figure.add_trace( Bar(name='Зважені ненормалізовані', x=names, y=non_normalized)) figure.add_trace(Bar(name='Зважені нормалізовані', x=names, y=normalized)) figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20}) figure.write_image('images/scores.png', width=1200, height=600) features = ['досвід', 'фінанси', 'іновації', 'динаміка', 'стабільність'] standard = max(companies, 0) figure = Figure() # Графік профілів таксонометричного методу й еталону. figure.add_trace( Bar(name='Незважені', x=features, y=companies[argmin(unweighted)])) figure.add_trace( Bar(name='Зважені ненормалізовані', x=features, y=companies[argmin(non_normalized)])) figure.add_trace( Bar(name='Зважені нормалізовані', x=features, y=companies[argmin(normalized)])) figure.add_trace(Bar(name='Еталон', x=features, y=standard)) figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20}) figure.write_image('images/profiles.png', width=1200, height=600) figure = create_dendrogram( # Дендрограма відносно еталонного рішення. append(companies, [standard], 0), orientation='left', labels=names + ['standard']) figure.update_layout(margin={'t': 20, 'r': 20, 'b': 20, 'l': 20}) figure.write_image('images/dendrogram.png', width=1200, height=600)
def _get_dendrogram_fig(self) -> Figure: """Generate a dendrogram figure object in plotly. :return: A plotly figure object """ labels = [self._id_temp_label_map[file_id] for file_id in self._doc_term_matrix.index.values] return ff.create_dendrogram( self._doc_term_matrix, orientation=self._dendro_option.orientation, distfun=lambda matrix: pdist( matrix, metric=self._dendro_option.dist_metric), linkagefun=lambda dist: linkage( dist, method=self._dendro_option.linkage_method), labels=labels )
def plot_dendro(sfam, method='ssap', show='jup'): ''' Plots dendrogram from the distance matrix (SSAP/overlap) ''' if method == 'ssap': directory = './distance_matrices/' elif method == 'over': directory = './percent_overlap/' else: print('Unknown method') return t = exclude_missing_data(fetch_sfam_matrix(sfam, method=method)) names = t.index dendro = ff.create_dendrogram(t.fillna(0), labels=names) dendro['layout'].update({'width': 800, 'height': 500}) if show == 'jup': plotly.offline.iplot(dendro, filename='simple_dendrogram') elif show == 'html': plotly.offline.plot(dendro, filename='simple_dendrogram') else: print("Unknown show method")
def hierarchical_plotly(df): #initializing the text array to contain all reviews by region text_array = [] #getting the list of the regions region = df['region'].unique() for reg in range(9): data = np.array(df.loc[df.region == region[reg], ['text']]) b = data.ravel() data_str = ' '.join(b) text_array.append(data_str) txt = np.array(text_array) vect = CountVectorizer() bag = vect.fit_transform(txt) #checking the vocab #print(vect.vocabulary_) S = 1 - cosine_similarity(bag) lnk = ward(S) #evaluate the quality of ward cluster using silhouette scores ward_label = AgglomerativeClustering(n_clusters=3, linkage='ward') w_label = ward_label.fit_predict(lnk) h_silhouette_score = silhouette_score(lnk, w_label) #use plotly to create a dendrogram fig = ff.create_dendrogram( S, orientation='left', labels=region, linkagefun=lambda x: linkage(S, 'ward', metric='euclidean')) #Update the width, height, and title fig['layout'].update( width=800, height=600, title= 'Hierarchical Clustering Dendrogram of Reviews by Region (sihouette score = ' + str(round(h_silhouette_score, 2)) + ')') fig['layout'].update(xaxis=dict( #range=[0, 0.05], title='cosine similarity distance')) py.plot(fig, filename='dendrogram_with_labels')
def plot_dendrogram(linkage_matrix, labels=None): fig = create_dendrogram( linkage_matrix, labels=labels, distfun=lambda x: x, linkagefun=lambda x: x, orientation="left", color_threshold=-0.1, colorscale=["#46bac2"], ) if labels is None: fig.update_yaxes(visible=False, showticklabels=False) fig.update_xaxes(range=[-0.05, 1.05], visible=False, showticklabels=False) fig.update_layout(xaxis={ "mirror": False, "showgrid": False, "showline": False, "zeroline": False, "ticks": "", "fixedrange": True, }, yaxis={ "mirror": False, "showgrid": False, "showline": False, "zeroline": False, "ticks": "", "fixedrange": True, }, font={"color": "#371ea3"}, template="none", margin={ "t": 78, "b": 71, "r": 30 }) return fig
def create_dendrogram(*args, **kwargs): FigureFactory._deprecated('create_dendrogram') from plotly.figure_factory import create_dendrogram return create_dendrogram(*args, **kwargs)