def view_from_jupyter( self, document_id: str, metric: str or Callable = 'jensenshannon', num_top_similar: int = 5, num_digits: int = 3, keep_similar_by_words: bool = True, display_output: bool = True, give_html: bool = False,): """ Method for viewing documents similar to requested one from jupyter notebook. Provides document titles and snippets of first few sentences. Parameters ---------- document_id ID of the document in `dataset` metric Distance measure which is to be used to measure how documents differ from each other If str -- should be one of 'jensenshannon', 'euclidean', 'cosine', 'correlation' -- as in scipy.spatial.distance.cdist If callable -- should map two vectors to numeric value num_top_similar How many top similar documents' IDs to show keep_similar_by_words Whether or not to keep in the output those documents that are similar to the given one by their constituent words and words' frequencies display_output if provide output at the end of method run give_html return html string generated by the method Returns ------- topic_html html string of the generated output """ from IPython.display import display_html from topicnet.cooking_machine.pretty_output import make_notebook_pretty make_notebook_pretty() search_ids, search_distances = self.view( document_id=document_id, metric=metric, num_top_similar=num_top_similar, keep_similar_by_words=keep_similar_by_words, ) topic_html = '' search_ids = [document_id] + search_ids search_distances = [0] + search_distances for doc_id, distance in zip(search_ids, search_distances): document = self._dataset.get_source_document(doc_id) topic_html += prepare_doc_html_with_similarity(document, distance) if display_output: display_html(topic_html, raw=True) if give_html: return topic_html
def view_from_jupyter( self, current_num_top_doc: int = None, topic_names: list = None, display_output: bool = True, give_html: bool = False, ): """ TopDocumentsViewer method recommended for use from jupyter notebooks Returns texts of the actual documents. Parameters ---------- current_num_top_doc number of top documents to provide for each cluster (Default value = None) topic_names list of topic names to view display_output if provide output at the end of method run give_html return html string generated by the method Returns ------- html_output html string of the output """ from IPython.display import display_html from topicnet.cooking_machine.pretty_output import make_notebook_pretty make_notebook_pretty() html_output = [] doc_list = self.view(current_num_top_doc, topic_names=topic_names) for topic_name, topic_docs_dict in doc_list.items(): topic_docs = list(topic_docs_dict.keys()) topic_html = '' topic_headline = f'<h1><b>Topic name:</b> {topic_name}</h1>' topic_html += topic_headline for doc_id in topic_docs: document = self._dataset.get_source_document(doc_id) topic_html += prepare_html_string(document) html_output.append(topic_html) if display_output: display_html('<br />'.join(html_output), raw=True) if give_html: return html_output
def view_from_jupyter(self, display_output: bool = True, give_html: bool = False, **kwargs): """ TopicMapViewer method recommended for use from jupyter notebooks returns closest pairs of models topics and visualizes their top tokens The class provide information about top tokens of the model topics providing with different methods to score that. Parameters ---------- display_output if provide output at the end of method run give_html return html string generated by the method Returns ------- out_html html string of the output Another Parameters ------------------ **kwargs *kwargs* are optional `~.TopTokenViewer` properties """ from IPython.display import display_html from topicnet.cooking_machine.pretty_output import make_notebook_pretty if 'digits' in kwargs: digits = kwargs.pop('digits') else: digits = 5 make_notebook_pretty() first_model_order, second_model_order = self.view() token_view = (TopTokensViewer(model=self.model, **kwargs).view_from_jupyter( topic_names=first_model_order, digits=digits, display_output=False, give_html=True)) second_token_view = (TopTokensViewer( model=self.second_model, **kwargs).view_from_jupyter(topic_names=second_model_order, digits=digits, display_output=False, give_html=True)) model_name = self.model.model_id second_model_name = self.second_model.model_id out_html = '<table style=display:inline; cellpadding="5";><tbody>{0}</tbody></table>' first_element = (f'<tr><td> First model name: ' f'{model_name}</td><td> Second model ' f'name: {second_model_name}</td></tr>{{0}}') out_html = out_html.format(first_element) table_contents = [] for t1, t2 in zip(token_view, second_token_view): table_contents += [f'<tr><td>{t1}</td><td>{t2}</td></tr>'] out_html = out_html.format(''.join(table_contents)) if display_output: display_html(out_html, raw=True) if give_html: return out_html
def view_from_jupyter( self, topic_names: Union[str, List[str]] = None, digits: int = 5, horizontally_stack: bool = True, display_output: bool = True, give_html: bool = False, ): """ TopTokensViewer method recommended for use from jupyter notebooks Parameters ---------- topic_names topics requested for viewing digits Number of digits to round each probability to horizontally_stack if True, then tokens for each modality will be stacked horizontally (instead of being a single long multi-line DataFrame) display_output request for function to output the information together with iterable output intended to be used as user defined output give_html return html string generated by the method Returns ------- topic_html_strings: list of strings in HTML format Examples -------- >>> # model training here >>> # ... >>> viewer = TopTokensViewer(model) >>> information = viewer.view_from_jupyter() >>> # or >>> information = viewer.view_from_jupyter(output=False) """ from IPython.core.display import display_html from topicnet.cooking_machine.pretty_output import make_notebook_pretty make_notebook_pretty() if isinstance(topic_names, list): pass elif isinstance(topic_names, str): topic_names = [topic_names] elif topic_names is None: topic_names = self._model.topic_names else: raise TypeError(f'Invalid type `topic_names` type: "{type(topic_names)}"') topic_html_strings = [] for topic in topic_names: topic_html = self.to_html( topic_names=topic, digits=digits, horizontally_stack=horizontally_stack, ) if display_output: display_html(topic_html, raw=True) topic_html_strings.append(topic_html) if give_html: return topic_html_strings