Ejemplo n.º 1
0
    def to_semantic_network(self,
                            nodes='words',
                            normalize='lemma',
                            edge_weighting='default',
                            window_width=10):
        """
        Transform ``Doc`` into a semantic network, where nodes are either 'words'
        or 'sents' and edges between nodes may be weighted in different ways.

        Args:
            nodes ({'words', 'sents'}): type of doc component to use as nodes
                in the semantic network
            normalize (str or callable): if 'lemma', lemmatize terms; if 'lower',
                lowercase terms; if false-y, use the form of terms as they appear
                in doc; if a callable, must accept a ``spacy.Token`` or ``spacy.Span``
                (if ``nodes`` = 'words' or 'sents', respectively) and return a
                str, e.g. :func:`textacy.spacy_utils.normalized_str()`
            edge_weighting (str): type of weighting to apply to edges
                between nodes; if ``nodes == 'words'``, options are {'cooc_freq', 'binary'},
                if ``nodes == 'sents'``, options are {'cosine', 'jaccard'}; if
                'default', 'cooc_freq' or 'cosine' will be automatically used
            window_width (int): size of sliding window over terms that
                determines which are said to co-occur; only applicable if 'words'

        Returns:
            :class:`networkx.Graph <networkx.Graph>`: where nodes represent either
                terms or sentences in doc; edges, the relationships between them

        Raises:
            ValueError: if ``nodes`` is neither 'words' nor 'sents'

        See Also:
            :func:`terms_to_semantic_network() <textacy.network.terms_to_semantic_network>`
            :func:`sents_to_semantic_network() <textacy.network.sents_to_semantic_network>`
        """
        if nodes == 'words':
            if edge_weighting == 'default':
                edge_weighting = 'cooc_freq'
            return network.terms_to_semantic_network(
                list(textacy.extract.words(self)),
                normalize=normalize,
                window_width=window_width,
                edge_weighting=edge_weighting)
        elif nodes == 'sents':
            if edge_weighting == 'default':
                edge_weighting = 'cosine'
            return network.sents_to_semantic_network(
                list(self.sents),
                normalize=normalize,
                edge_weighting=edge_weighting)
        else:
            msg = 'nodes "{}" not valid; must be in {}'.format(
                nodes, {'words', 'sents'})
            raise ValueError(msg)
Ejemplo n.º 2
0
    def to_semantic_network(self, nodes='words',
                            edge_weighting='default', window_width=10):
        """
        Transform ``Doc`` into a semantic network, where nodes are either 'words'
        or 'sents' and edges between nodes may be weighted in different ways.

        Args:
            nodes ({'words', 'sents'}): type of doc component to use as nodes
                in the semantic network
            edge_weighting (str): type of weighting to apply to edges
                between nodes; if ``nodes == 'words'``, options are {'cooc_freq', 'binary'},
                if ``nodes == 'sents'``, options are {'cosine', 'jaccard'}; if
                'default', 'cooc_freq' or 'cosine' will be automatically used
            window_width (int): size of sliding window over terms that
                determines which are said to co-occur; only applicable if 'words'

        Returns:
            :class:`networkx.Graph <networkx.Graph>`: where nodes represent either
                terms or sentences in doc; edges, the relationships between them

        Raises:
            ValueError: if ``nodes`` is neither 'words' nor 'sents'

        See Also:
            :func:`terms_to_semantic_network() <textacy.network.terms_to_semantic_network>`
            :func:`sents_to_semantic_network() <textacy.network.sents_to_semantic_network>`
        """
        if nodes == 'words':
            if edge_weighting == 'default':
                edge_weighting = 'cooc_freq'
            return network.terms_to_semantic_network(
                list(textacy.extract.words(self)),
                window_width=window_width,
                edge_weighting=edge_weighting)
        elif nodes == 'sents':
            if edge_weighting == 'default':
                edge_weighting = 'cosine'
            return network.sents_to_semantic_network(
                list(self.sents), edge_weighting=edge_weighting)
        else:
            msg = 'nodes "{}" not valid; must be in {}'.format(
                nodes, {'words', 'sents'})
            raise ValueError(msg)