def __create_neighbor_df( self, query ): rows = [] top_label = "Top %d Neighbor Terms" % config.get("num_neighbors", 10) columns = [ "Query Term", top_label ] # generate the individual recommendations top_label = "Top %d Neighbor Terms" % config.get("num_neighbors", 10) columns = [ "Query Term", top_label ] for query_term in query: neighbors = self.embed.get_neighbors( query_term, num_neighbors = config.get("num_neighbors", 10) ) row = { "Query Term" : query_term, top_label : ", ".join( neighbors ) } rows.append( row ) # generate the overall combination neighbors = self.embed.get_neighbors( query, num_neighbors = config.get("num_neighbors", 10) ) row = { "Query Term" : "Combined Query", top_label : ", ".join( neighbors ) } rows.append( row ) return pd.DataFrame( rows )
def generate_termlevel_card_text( self ): text = "Individual term-level silhouette scores for the top %d terms in the descriptor of the topic selected below." % ( config.get( "top_terms", 10) ) text += " Terms are arranged in descending order by score." text += " where a score close to 1 indicates a term that is semantically coherent with respect to its topic," text += " while a score close to -1 indicates a term that does not fit well with its topic." return dcc.Markdown( text )
def generate_neighbor_card_text( self ): num_neighbors = config.get("num_neighbors", 10) text = "The word embedding *%s* was generated by the *%s* algorithm on the %s, " % ( self.metadata["id"], self.metadata["algorithm"]["id"], self.metadata["description"] ) text += " where each term is represented by a vector of %d dimensions." % ( self.metadata["dimensions"] ) text += "\n\nEnter one or more query terms and hit return to show a list of the %d most similar neighbor terms for each query term, based on the similarities in the word embedding." % num_neighbors text += " The final row of the table shows the %d most similar neighbors for the combined set of query terms when considered together." % num_neighbors return dcc.Markdown(text)
def generate_vtable( self ): """ Generates a Dash table containing topic-level validation scores. """ if self.current_embed_id is None: return "" # already cached these results? if self.current_embed_id in self.validation_cache: df = self.validation_cache[self.current_embed_id] log.info("Using cached comparison validation scores for embedding %s" % self.current_embed_id ) else: # get the word embedding embed = self.webcore.get_embedding(self.current_embed_id) if embed is None: return "" # perform the evaluation log.info("Performing comparison on %d topic models using %s ..." % (len(self.all_metadata), self.current_embed_id) ) df = self.validator.get_validation_df( self.all_metadata, embed ) if df is None: return "" # round it df = df.round( config.get("precision", 3) ) self.validation_cache[self.current_embed_id] = df if df is None: return "" data = df.to_dict('records') columns = [] for i in df.columns: if i in measure_short_names: columns.append( {"name": measure_short_names[i], "id": i, "deletable": False, "selectable": False} ) else: columns.append( {"name": i, "id": i, "deletable": False, "selectable": False} ) return dash_table.DataTable( id='validation_model', columns=columns, data=data, sort_action='native', style_header={ 'backgroundColor': 'white', 'fontWeight': 'bold', 'border-top': '1px solid #dee2e6', 'border-bottom': '2px solid #dee2e6', 'line-height': 3.1 }, style_cell= { 'textAlign': 'right', 'border-top': '1px solid #dee2e6', 'line-height': 3.1 }, style_cell_conditional=[ { 'if': {'column_id': c}, 'textAlign': 'left' } for c in ['Name', 'Corpus'] ], style_as_list_view=True )
def __apply_mds(self, D): """ Applies Multidimensional scaling (MDS) to the specified distance matrix and returns the resulting coordinates """ with warnings.catch_warnings(): warnings.simplefilter("ignore") mds = manifold.MDS(n_components=2, random_state=config.get("random_seed", 100), dissimilarity="euclidean") results = mds.fit(D) # we just need the coordinates return results.embedding_
def generate_termlevel_chart( self ): if self.current_embed_id is None: return "" # already cached these results? if self.current_embed_id in self.termlevel_cache: scores = self.termlevel_cache[self.current_embed_id] log.info("Using cached term-level silhouette scores for embedding %s" % self.current_embed_id ) else: # get the word embedding embed = self.webcore.get_embedding(self.current_embed_id) if embed is None: return "" log.info("Applying term-level silhouette analysis to topic model using %s ..." % self.current_embed_id ) scores = self.validator.get_termlevel_silhouette_scores( self.metadata, embed ) if scores is None: return "" self.termlevel_cache[self.current_embed_id] = scores # create the values for the chart, based on the currently selected topic term_scores = pd.Series( scores[self.current_topic_index-1] ).sort_values(ascending=True) xvalues, yvalues = [], [] for term, score in term_scores.iteritems(): xvalues.append( round( score, config.get("precision", 3) ) ) yvalues.append( term ) # choose a sensible range for the x-axis min_value, max_value = -1, 1 # get the color from the palette colors = self.get_colors( self.metadata["k"] ) s_rgb = self.format_color_string( colors[self.current_topic_index-1] ) # generate the chart return dcc.Graph( id='chart_topicsil', figure={ 'data': [ { 'x': xvalues, 'y': yvalues, 'type': 'bar', 'orientation' : 'h', 'marker' : { 'color': s_rgb, 'opacity': 0.4 }, 'hovertemplate': '<b>%{y}</b>: %{x}<extra></extra>', 'hoverlabel' : { 'bgcolor' : 'rgb(250, 246, 208)' } }, ], 'layout': { 'margin': { "t" : 30, "l" : 120, "r" : 120 }, 'yaxis' : { 'tickfont' : { "size" : 14 } }, 'xaxis' : { 'title' : "Term Silhouette Score", 'tickfont' : { "size" : 14 }, 'titlefont' : { "size" : 15 }, 'range': [min_value, max_value] } } })
def generate_heatmap_card( self ): query_string = config.get("query_sample", "") return dbc.Card( [ dbc.CardHeader("Embedding Term Similarity Heatmap", className="card-header"), dbc.CardBody( [ html.Div( self.generate_heatmap_card_text(), className="card-text"), html.Div( self.generate_embed_heatmap( query_string ), id='content_embed_heatmap'), ] ) ], )
def generate_vsummary( self ): """ Generates a Dash table containing overall model-level validation scores. """ if self.current_embed_id is None: return "" # already cached these results? if self.current_embed_id in self.validation_cache: df = self.validation_cache[self.current_embed_id] log.info("Using cached validation scores for embedding %s" % self.current_embed_id ) else: # get the word embedding embed = self.webcore.get_embedding(self.current_embed_id) if embed is None: return "" # perform the evaluation log.info("Evaluating overall topic model using %s ..." % self.current_embed_id ) df = self.validator.get_validation_df( self.metadata, embed ) self.validation_cache[self.current_embed_id] = df if df is None: return "" # generate data df_mean = df.mean( axis = 0) df_mean = df_mean.round( config.get("precision",3) ) data = [] columns = [ "Measure", "Mean Value" ] for i, value in df_mean.iteritems(): if not i in measure_names: continue label = "%s (%s)" %( measure_names[i], measure_short_names[i] ) data.append( { "Measure" : label, "Mean Value" : value } ) # generate return dash_table.DataTable( id='validation_summary', columns=[{"name": i, "id": i, "deletable": False, "selectable": False} for i in columns], data=data, style_cell= { 'textAlign': 'center' }, style_header={ 'backgroundColor': 'white', 'fontWeight': 'bold', 'border-bottom': '2px solid #808080' }, style_cell_conditional=[ { 'if': {'column_id': c}, 'textAlign': 'left' } for c in ['Measure'] ], style_as_list_view=True )
def generate_document_association_chart( self ): descriptors = self.metadata.get_descriptors() if descriptors is None: return "" if self.document_associations is None: self.document_associations = self.metadata.get_document_associations() if self.document_associations is None: return "" # get the top documents for this topic weights = self.document_associations[self.current_document_topic_index].sort_values(ascending=False).head( self.top_associations ) max_value = self.document_associations.max().max() # reverse the order weights = weights.sort_values(ascending=True) xvalues, yvalues = [], [] for doc_id, score in weights.iteritems(): xvalues.append( round( score, config.get("precision", 3) ) ) yvalues.append( doc_id + " " ) # get the color from the palette colors = self.get_colors( self.metadata["k"] ) s_rgb = self.format_color_string( colors[self.current_document_topic_index-1] ) # generate the chart title = "Topic %02d: %s" % ( self.current_document_topic_index, ", ".join( descriptors[self.current_document_topic_index-1] ) ) chart_height = self.get_barchart_height( len(xvalues) ) return dcc.Graph( id='chart_document_assoc', figure={ 'data': [ { 'x': xvalues, 'y': yvalues, 'type': 'bar', 'orientation' : 'h', 'marker' : { 'color': s_rgb, 'opacity': 0.4 }, 'hovertemplate': '<b>%{y}</b>: %{x}<extra></extra>', 'hoverlabel' : { 'bgcolor' : 'rgb(250, 246, 208)' } }, ], 'layout': { 'title' : { 'text': title, 'font' : { "size" : 15 } }, 'height' : chart_height, 'margin': { "t" : 40, "l" : 250, "r" : 100 }, 'yaxis' : { 'tickfont' : { "size" : 14 } }, 'xaxis' : { 'title' : "Topic-Document Association", 'tickfont' : { "size" : 13 }, 'titlefont' : { "size" : 15 }, 'range': [0, max_value] }, } })
def __init__( self, webcore, all_model_metadata ): super(ComparisonLayout, self).__init__( webcore ) # validation measures self.validator = ModelValidator() # page details self.page_title = "%s - Comparison" % self.page_title self.page_suffix = "-comparison" # current state self.top_terms = config.get("top_terms", 10) self.all_metadata = all_model_metadata self.current_embed_id = None self.current_metadata_indices = [0, 1] # cache of validation results self.validation_cache = {}
def __calculate_similarity_df( self, terms ): """ Calculate similarity matrix between specified terms. Note that this assumes the embedding model has been previously loaded. """ n = len(terms) rows = [] for i in range(n): rows.append( { "term1" : terms[i], "term2" : terms[i], "sim" : 1.0 } ) for j in range(i+1,n): if terms[i] in self.embed and terms[j] in self.embed: sim = round( self.embed.similarity( terms[i], terms[j] ), config.get("precision",3) ) else: sim = 0.0 rows.append( { "term1" : terms[i], "term2" : terms[j], "sim" : sim } ) rows.append( { "term1" : terms[j], "term2" : terms[i], "sim" : sim } ) return pd.DataFrame( rows )
def __init__(self, model_id, meta_file_path): log.info("Loading model metadata from %s" % meta_file_path) # read the JSON fin = open(meta_file_path, "r") data = json.load(fin) fin.close() if type(data) != dict: raise Exception("Invalid JSON format in metadata file") if not "type" in data: raise Exception("No type specified in metadata file") if data["type"] != "topic_model": raise Exception("Metadata does not describe a topic model") if not "files" in data: raise Exception("No file paths specified in metadata file") # add the metadata self["id"] = model_id for key in data: self[key] = data[key] # ensure we have the mandatory metadata if not "corpus" in self: self["corpus"] = "unknown" if not "algorithm" in self: self["algorithm"] = { "id" : "unknown", "parameters" : {} } for key in ["topics", "documents", "terms"]: if not key in self: self[key] = 0 # other properties self.meta_file_path = Path(meta_file_path) self.dir_base = meta_file_path.parent self.term_rankings = None self.partition = None self.term_associations = None self.document_associations = None # other settings self.top_terms = config.get("top_terms", 10) self.extended_top_terms = config.get("extended_top_terms", 20)
def __init__( self, webcore, all_model_metadata, show_navbar = True ): super(ValidationLayout, self).__init__( webcore ) # validation measures self.validator = TopicValidator() # page details self.show_navbar = show_navbar self.page_title = "%s - Validation" % self.page_title self.page_suffix = "-validation" # current state self.metadata = all_model_metadata self.current_embed_id = None self.current_measure_id = config.get( "default_measure", "coherence" ) # cache of validation results self.validation_cache = {} self.term_distribution_cache = {}
def __init__( self, webcore, model_metadata, show_navbar = True ): super(TopicModelLayout, self).__init__( webcore ) self.show_navbar = show_navbar # page details self.page_title = "%s - Topic Model" % self.page_title self.page_suffix = "-topics" # number of top associations self.top_associations = config.get("num_associations", 20) # current state self.metadata = model_metadata self.current_term_topic_index = 1 self.current_document_topic_index = 1 # cache self.term_associations = None self.document_associations = None self.partition_df = None
def generate_matching_table( self ): if self.current_embed_id is None: return "" embed = self.webcore.get_embedding(self.current_embed_id) if embed is None: return "" descriptors1 = self.all_metadata[self.current_metadata_indices[0]].get_descriptors() descriptors2 = self.all_metadata[self.current_metadata_indices[1]].get_descriptors() if descriptors1 is None or descriptors2 is None: return "" # perform the match matcher = TopicMatcher(embed) permutation, similarities = matcher.match(descriptors1, descriptors2) # create the table k1, k2 = len(descriptors1), len(descriptors2) num_fmt = "%02d" if max(k1,k2) < 100 else "%03d" rows = [] matched_model2 = [] for topic_index1 in range(k1): topic_index2 = permutation[topic_index1] matched_model2.append(topic_index2) ranking1 = descriptors1[topic_index1] row = { "Topic 1":num_fmt % (topic_index1+1) } row["Descriptor 1"] = ", ".join(ranking1[0:self.top_terms]) if topic_index2 < k2: ranking2 = descriptors2[topic_index2] row["Topic 2"] = num_fmt % (topic_index2+1) row["Descriptor 2"] = ", ".join(ranking2[0:self.top_terms]) row["Similarity"] = config.get("float_format","%.3f") % similarities[topic_index1] else: row["Topic 2"] = "" row["Descriptor 2"] = "" row["Similarity"] = "" rows.append( row ) for topic_index2 in range(k2): if topic_index2 in matched_model2: continue ranking2 = descriptors2[topic_index2] row = { "Topic 2":num_fmt % (topic_index2+1) } row["Descriptor 2"] = ", ".join(ranking2[0:self.top_terms]) row["Topic 1"] = "" row["Descriptor 1"] = "" row["Similarity"] = "" rows.append( row ) df = pd.DataFrame(rows) alignments = { "Topic 1":"center", "Topic 2":"center", "Similarity":"center" } return DataFrameTable( df, id="matching-table", alignments=alignments, striped=False, hover=False ).generate_layout()
def generate_measure_dropdown(self): """ Utility function to generate a dropdown component which allows the user to choose between different topic evaluation measures. """ measure_options = [] for measure_id in measure_names: measure_options.append({ "label": "%s (%s)" % (measure_names[measure_id], measure_short_names[measure_id]), "value": measure_id }) default_measure = config.get("default_measure", measure_options[0]["value"]) return dbc.Select(id='measure-dropdown', options=measure_options, value=default_measure)
def get_validation_df(self, all_meta, embed): if embed is None: return None measures = get_measures(measure_names.keys(), embed) rows = [] for meta in all_meta: descriptors = meta.get_descriptors() if descriptors is None: continue row = { "Name": meta["id"], "Corpus": meta["corpus"], "Topics": len(descriptors) } for measure_id in measures: score = measures[measure_id].evaluate_model(descriptors) # TODO: move rounding elsewhere? row[measure_id] = round(score, config.get("precision", 3)) rows.append(row) return pd.DataFrame(rows)
def generate_neighbor_card( self ): query_string = config.get("query_sample", "") return dbc.Card( [ dbc.CardHeader("Embedding Term Neighbors", className="card-header"), dbc.CardBody( [ html.Div( self.generate_neighbor_card_text(), className="card-text"), dbc.InputGroup( [ dbc.InputGroupAddon("Query Terms", addon_type="prepend"), dbc.Input(id="query-embed", value=query_string, placeholder="Enter a list of comma-separated terms...", type="text", debounce=True, className="custom-text"), ] ), html.Div( self.generate_neighbor_table( query_string ), id='content_neighbor_table' ), ] ), ], )
def get_validation_df(self, meta, embed): """ Get a Data Frame containing validation scores for the individual topics in the specified topic model. """ if embed is None: return pd.DataFrame([]) descriptors = meta.get_descriptors() if descriptors is None: return pd.DataFrame([]) measures = get_measures(measure_names.keys(), embed) rows = [] num_fmt = "%02d" if len(descriptors) < 100 else "%03d" for i in range(meta["k"]): rows.append({ "Topic": num_fmt % (i + 1), "Descriptor": ", ".join(descriptors[i]) }) for measure_id in measures: scores = measures[measure_id].evaluate_topics(descriptors) for i, score in enumerate(scores): # TODO: move rounding elsewhere? rows[i][measure_id] = round(score, config.get("precision", 3)) return pd.DataFrame(rows)
def generate_embed_heatmap( self, query_string ): # parse the query string query = self.__parse_query_string( query_string ) if len(query) == 0: return "" # get the embedding if self.embed is None: self.embed = self.webcore.get_embedding(self.metadata["id"]) if self.embed is None: return "" # get all unique terms all_terms = [] for query_term in query: all_terms.append( query_term ) neighbors = self.embed.get_neighbors( query_term, num_neighbors = config.get("num_neighbors", 10) ) for term in neighbors: if not term in all_terms: all_terms.append( term ) df = self.__calculate_similarity_df( all_terms ) return dcc.Graph( id='chart_topicheatmap', figure={ 'data': [ { 'x': df["term1"], 'y': df["term2"], 'z': df["sim"], 'type': 'heatmap', 'hoverlabel' : { 'bgcolor' : 'rgb(250, 246, 208)' }, # 'hovertemplate': '<b>%text</b><br>Similarity: %{z}<extra></extra>', }, ], 'layout': { 'margin': { "t" : 2 }, 'height': 900, "yaxis" : { "autorange" : 'reversed'}, } })
def __find_metadata(self): self.embedding_meta = {} self.model_meta = {} extension = config.get("file_extension", ".meta") meta_file_paths = self.dir_core.glob('**/*' + extension) for meta_file_path in meta_file_paths: try: with open(meta_file_path, "r") as fin: data = json.load(fin) if type(data) != dict: continue # create the ID as a relative path minus the extension meta_id = filepath_to_metadata_id( meta_file_path, self.dir_core ) if data["type"] == "embedding": self.embedding_meta[meta_id] = EmbeddingMeta(meta_id, meta_file_path) elif data["type"] == "topic_model": self.model_meta[meta_id] = TopicModelMeta(meta_id, meta_file_path) else: log.info("Unknown metadata type %s in file %s" % (data["type"], meta_file_path)) except Exception as e: log.warning("Skipping file: %s" % meta_file_path) log.warning(e) log.info("Found %d embeddings, %d topic models" % (len(self.embedding_meta), len(self.model_meta)))
def generate_topiclevel_heatmap(self): """ Generate a heatmap depicting pairwise topic-topic similarities. """ if self.current_embed_id is None: return "" descriptors = self.metadata.get_descriptors() if descriptors is None: return "" # already cached these results? if self.current_embed_id in self.topiclevel_cache: df = self.topiclevel_cache[self.current_embed_id] log.info("Using cached similarites for embedding %s" % self.current_embed_id) else: # get the word embedding embed = self.webcore.get_embedding(self.current_embed_id) if embed is None: return "" log.info("Computing similarities for topic model using %s ..." % self.current_embed_id) df = self.validator.get_topic_pair_similarity_df( self.metadata, embed) if df is None: return "" # round it df = df.round(config.get("precision", 3)) self.topiclevel_cache[self.current_embed_id] = df # generate the chart hovertext = [] for i, row in df.iterrows(): topic_num1 = int(row["topic1"].replace("Topic ", "")) - 1 topic_num2 = int(row["topic2"].replace("Topic ", "")) - 1 s = "<b>%s</b>: %s<br><b>%s</b>: %s" % (row["topic1"], ", ".join( descriptors[topic_num1]), row["topic2"], ", ".join( descriptors[topic_num2])) hovertext.append(s) return dcc.Graph( id='chart_topicheatmap', figure={ 'data': [ { 'x': df["topic1"], 'y': df["topic2"], 'z': df["sim"], 'hovertext': hovertext, 'type': 'heatmap', 'hoverlabel': { 'bgcolor': 'rgb(250, 246, 208)' }, 'hovertemplate': '%{hovertext}<br>Similarity: %{z}<extra></extra>', }, ], 'layout': { 'margin': { "t": 2 }, 'height': 600, "yaxis": { "autorange": 'reversed' }, } })
def generate_termlevel_heatmap(self): """ Generate a heatmap showing the similarities between the pairs of terms which appear in the descriptor of an individual topic. """ if self.current_embed_id is None: return "" descriptors = self.metadata.get_descriptors() if descriptors is None: return "" # already cached these results? if self.current_embed_id in self.termlevel_cache: df = self.termlevel_cache[self.current_embed_id] log.info("Using cached term similarites for embedding %s" % self.current_embed_id) else: # get the word embedding embed = self.webcore.get_embedding(self.current_embed_id) if embed is None: return "" log.info( "Computing term similarities for topic model using %s ..." % self.current_embed_id) df = self.validator.get_term_pair_similarity_df( self.metadata, embed) if df is None: return "" # round it df = df.round(config.get("precision", 3)) self.termlevel_cache[self.current_embed_id] = df # now get the relevant terms for this topic and filter the Data Frame current_descriptor = descriptors[self.current_topic_index - 1] current_descriptor_set = set(current_descriptor) xvalues, yvalues, zvalues, hovertext = [], [], [], [] # TODO: make this more effecient for i, row in df.iterrows(): if row["term1"] in current_descriptor_set and row[ "term2"] in current_descriptor_set: xvalues.append(row["term1"]) yvalues.append(row["term2"]) zvalues.append(row["sim"]) hovertext.append("<b>(%s, %s)</b>" % (row["term1"], row["term2"])) # generate the chart title = "Topic %02d: %s" % (self.current_topic_index, ", ".join(current_descriptor)) return dcc.Graph( id='chart_topicheatmap', figure={ 'data': [ { 'x': xvalues, 'y': yvalues, 'z': zvalues, 'hovertext': hovertext, 'type': 'heatmap', 'hoverlabel': { 'bgcolor': 'rgb(250, 246, 208)' }, 'hovertemplate': '%{hovertext}<br>Similarity: %{z}<extra></extra>', }, ], 'layout': { 'title': { 'text': title, 'font': { "size": 15 } }, 'margin': { "t": 40, "l": 200, "r": 200 }, 'height': 600, 'xaxis': { 'tickfont': { "size": 14 } }, "yaxis": { "autorange": 'reversed', 'tickfont': { "size": 14 } }, } })
def generate_heatmap_card_text( self ): text = "The heatmap visualization below shows the similarities between all query terms entered above," text += " and the %d neighbors of those terms." % config.get("num_neighbors", 10) text += " The similarities of the terms are based on the word embedding *%s* selected above." % self.metadata["id"] return dcc.Markdown(text)
def generate_topiclevel_chart( self ): if self.current_embed_id is None: return "" # already cached these results? if self.current_embed_id in self.topiclevel_cache: df_sil = self.topiclevel_cache[self.current_embed_id] log.info("Using cached silhouette scores for embedding %s" % self.current_embed_id ) else: # get the word embedding embed = self.webcore.get_embedding(self.current_embed_id) if embed is None: return "" log.info("Applying silhouette analysis to topic model using %s ..." % self.current_embed_id ) df_sil = self.validator.get_topiclevel_silhouette_df( self.metadata, embed ) if df_sil is None: return "" # round it df_sil = df_sil.round( config.get("precision", 3) ) self.topiclevel_cache[self.current_embed_id] = df_sil # sort the results in reverse order df_sil = df_sil.sort_values(by="Score", ascending=True) colors = self.get_colors( self.metadata["k"] ) xvalues, yvalues, hovertext, s_colors = [], [], [], [] for label, row in df_sil.iterrows(): xvalues.append( row["Score"] ) yvalues.append( label + " " ) hovertext.append( row["Descriptor"] ) s_colors.append( self.format_color_string(colors[row["Number"]-1]) ) # choose a sensible range for the x-axis if ( min(df_sil["Score"]) <= -0.5) or (max(df_sil["Score"]) >= 0.5): min_value, max_value = -1, 1 else: min_value, max_value = -0.5, 0.5 # generate the chart if len(xvalues) <= 6: chart_height = 400 elif len(xvalues) <= 10: chart_height = 500 else: chart_height = 600 return dcc.Graph( id='chart_topicsil', figure={ 'data': [ { 'x': xvalues, 'y': yvalues, 'hovertext' : hovertext, 'type': 'bar', 'orientation' : 'h', 'marker' : { 'color' : s_colors, 'opacity': 0.4 }, 'hovertemplate': '<b>%{y}</b>: %{x}<br>%{hovertext}<extra></extra>', 'hoverlabel' : { 'bgcolor' : 'rgb(250, 246, 208)' } }, ], 'layout': { 'height' : chart_height, 'margin': { "t" : 20 }, 'yaxis' : { 'tickfont' : { "size" : 14 } }, 'xaxis' : { 'title' : "Topic Silhouette Score", 'tickfont' : { "size" : 13 }, 'titlefont' : { "size" : 15 }, 'range': [min_value, max_value] } } })