def view_from_jupyter(
            self,
            document_id: str,
            metric: str or Callable = 'jensenshannon',
            num_top_similar: int = 5,
            num_digits: int = 3,
            keep_similar_by_words: bool = True,
            display_output: bool = True,
            give_html: bool = False,):
        """
        Method for viewing documents similar to requested one
        from jupyter notebook. Provides document titles and snippets of
        first few sentences.

        Parameters
        ----------
        document_id
            ID of the document in `dataset`
        metric
            Distance measure which is to be used to measure how documents differ from each other
            If str -- should be one of 'jensenshannon', 'euclidean', 'cosine', 'correlation' --
                as in scipy.spatial.distance.cdist
            If callable -- should map two vectors to numeric value
        num_top_similar
            How many top similar documents' IDs to show
        keep_similar_by_words
            Whether or not to keep in the output those documents
            that are similar to the given one by their constituent words and words' frequencies
        display_output
            if provide output at the end of method run
        give_html
            return html string generated by the method

        Returns
        -------
        topic_html
            html string of the generated output
        """
        from IPython.display import display_html
        from topicnet.cooking_machine.pretty_output import make_notebook_pretty

        make_notebook_pretty()
        search_ids, search_distances = self.view(
            document_id=document_id,
            metric=metric,
            num_top_similar=num_top_similar,
            keep_similar_by_words=keep_similar_by_words,
        )

        topic_html = ''
        search_ids = [document_id] + search_ids
        search_distances = [0] + search_distances
        for doc_id, distance in zip(search_ids, search_distances):
            document = self._dataset.get_source_document(doc_id)
            topic_html += prepare_doc_html_with_similarity(document, distance)
        if display_output:
            display_html(topic_html, raw=True)

        if give_html:
            return topic_html
Exemple #2
0
    def view_from_jupyter(
        self,
        current_num_top_doc: int = None,
        topic_names: list = None,
        display_output: bool = True,
        give_html: bool = False,
    ):
        """
        TopDocumentsViewer method recommended for use
        from jupyter notebooks
        Returns texts of the actual documents.

        Parameters
        ----------
        current_num_top_doc
            number of top documents to provide for
            each cluster (Default value = None)
        topic_names
            list of topic names to view
        display_output
            if provide output at the end of method run
        give_html
            return html string generated by the method

        Returns
        -------
        html_output
            html string of the output
        """
        from IPython.display import display_html
        from topicnet.cooking_machine.pretty_output import make_notebook_pretty

        make_notebook_pretty()
        html_output = []

        doc_list = self.view(current_num_top_doc, topic_names=topic_names)

        for topic_name, topic_docs_dict in doc_list.items():
            topic_docs = list(topic_docs_dict.keys())
            topic_html = ''
            topic_headline = f'<h1><b>Topic name:</b> {topic_name}</h1>'
            topic_html += topic_headline
            for doc_id in topic_docs:
                document = self._dataset.get_source_document(doc_id)
                topic_html += prepare_html_string(document)
            html_output.append(topic_html)
        if display_output:
            display_html('<br />'.join(html_output), raw=True)
        if give_html:
            return html_output
Exemple #3
0
    def view_from_jupyter(self,
                          display_output: bool = True,
                          give_html: bool = False,
                          **kwargs):
        """
        TopicMapViewer method recommended for use
        from jupyter notebooks
        returns closest pairs of models topics
        and visualizes their top tokens

        The class provide information about top tokens
        of the model topics providing with different methods to score that.

        Parameters
        ----------
        display_output
            if provide output at the end of method run
        give_html
            return html string generated by the method

        Returns
        -------
        out_html
            html string of the output

        Another Parameters
        ------------------
        **kwargs
            *kwargs* are optional `~.TopTokenViewer` properties
        """
        from IPython.display import display_html
        from topicnet.cooking_machine.pretty_output import make_notebook_pretty
        if 'digits' in kwargs:
            digits = kwargs.pop('digits')
        else:
            digits = 5

        make_notebook_pretty()
        first_model_order, second_model_order = self.view()
        token_view = (TopTokensViewer(model=self.model,
                                      **kwargs).view_from_jupyter(
                                          topic_names=first_model_order,
                                          digits=digits,
                                          display_output=False,
                                          give_html=True))
        second_token_view = (TopTokensViewer(
            model=self.second_model,
            **kwargs).view_from_jupyter(topic_names=second_model_order,
                                        digits=digits,
                                        display_output=False,
                                        give_html=True))
        model_name = self.model.model_id
        second_model_name = self.second_model.model_id
        out_html = '<table style=display:inline; cellpadding="5";><tbody>{0}</tbody></table>'
        first_element = (f'<tr><td> First model name: '
                         f'{model_name}</td><td> Second model '
                         f'name: {second_model_name}</td></tr>{{0}}')
        out_html = out_html.format(first_element)
        table_contents = []
        for t1, t2 in zip(token_view, second_token_view):
            table_contents += [f'<tr><td>{t1}</td><td>{t2}</td></tr>']
        out_html = out_html.format(''.join(table_contents))
        if display_output:
            display_html(out_html, raw=True)
        if give_html:
            return out_html
Exemple #4
0
    def view_from_jupyter(
            self,
            topic_names: Union[str, List[str]] = None,
            digits: int = 5,
            horizontally_stack: bool = True,
            display_output: bool = True,
            give_html: bool = False,
    ):
        """
        TopTokensViewer method recommended for use
        from jupyter notebooks

        Parameters
        ----------
        topic_names
            topics requested for viewing
        digits
            Number of digits to round each probability to
        horizontally_stack
            if True, then tokens for each modality will be stacked horizontally
            (instead of being a single long multi-line DataFrame)
        display_output
            request for function to output the information
            together with iterable output intended to be used
            as user defined output
        give_html
            return html string generated by the method

        Returns
        -------
        topic_html_strings: list of strings in HTML format

        Examples
        --------
        >>> # model training here
        >>> # ...
        >>> viewer = TopTokensViewer(model)
        >>> information = viewer.view_from_jupyter()
        >>> # or
        >>> information = viewer.view_from_jupyter(output=False)
        """
        from IPython.core.display import display_html
        from topicnet.cooking_machine.pretty_output import make_notebook_pretty

        make_notebook_pretty()
        if isinstance(topic_names, list):
            pass
        elif isinstance(topic_names, str):
            topic_names = [topic_names]
        elif topic_names is None:
            topic_names = self._model.topic_names
        else:
            raise TypeError(f'Invalid type `topic_names` type: "{type(topic_names)}"')

        topic_html_strings = []

        for topic in topic_names:
            topic_html = self.to_html(
                topic_names=topic,
                digits=digits,
                horizontally_stack=horizontally_stack,
            )

            if display_output:
                display_html(topic_html, raw=True)

            topic_html_strings.append(topic_html)
        if give_html:
            return topic_html_strings