Python Matrix Beispiele, textplot.matrix.Matrix Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: helpers.py Projekt: MartinPaulEve/textplot

def build_graph(path, term_depth=1000, skim_depth=10,
                d_weights=False, **kwargs):

    """
    Tokenize a text, index a term matrix, and build out a graph.

    Args:
        path (str): The file path.
        term_depth (int): Consider the N most frequent terms.
        skim_depth (int): Connect each word to the N closest siblings.
        d_weights (bool): If true, give "close" nodes low weights.

    Returns:
        Skimmer: The indexed graph.
    """

    # Tokenize text.
    click.echo('\nTokenizing text...')
    t = Text.from_file(path)
    click.echo('Extracted %d tokens' % len(t.tokens))

    m = Matrix()

    # Index the term matrix.
    click.echo('\nIndexing terms:')
    m.index(t, t.most_frequent_terms(term_depth), **kwargs)

    g = Skimmer()

    # Construct the network.
    click.echo('\nGenerating graph:')
    g.build(t, m, skim_depth, d_weights)

    return g

Beispiel #2

0

Datei anzeigen

Datei: text.py Projekt: davidmcclure/tdiff

    def build_graph(self, term_depth=1000, skim_depth=10,
                    terms=None, **kwargs):

        """
        Construct a term graph.

        Args:
            term_depth (int): Consider the N most frequent terms.
            skim_depth (int): Connect each word to the N closest siblings.
            terms (list): Use a custom set of terms.

        Returns:
            textplot.graphs.Skimmer
        """

        # By default, use N most-frequent terms.
        terms = terms or self.most_frequent_terms(term_depth)

        # Index the term matrix.
        m = Matrix()
        m.index(self, terms, **kwargs)

        # Construct the network.
        g = Skimmer()
        g.build(self, m, skim_depth)

        return g

Beispiel #3

0

Datei anzeigen

def frequent(path,
             term_depth=500,
             skim_depth=10,
             d_weights=False,
             stopwordfile=None,
             postags=None,
             disambiguate=False,
             **kwargs):
    """
    Use most frequent terms.
    """

    t = Text.from_file(path,
                       stopwordfile=stopwordfile,
                       postags=postags,
                       disambiguate=disambiguate)
    m = Matrix(t)

    print('Indexing terms:')
    m.index(t.most_frequent_terms(term_depth), **kwargs)

    g = Skimmer()

    print('Generating graph:')
    g.build(m, skim_depth, d_weights)

    return g

Beispiel #4

0

Datei anzeigen

def test_missing_key():

    """
    If an unindexed key pair is passed, return None.
    """

    m = Matrix()
    m.set_pair('a', 'b', 1)

    assert m.get_pair('a', 'c') == None

Beispiel #5

0

Datei anzeigen

def test_set_pair():

    """
    set_pair() should set the value under an order-independent key.
    """

    m = Matrix()
    m.set_pair('a', 'b', 1)

    assert m.get_pair('a', 'b') == 1
    assert m.get_pair('b', 'a') == 1

Beispiel #6

0

Datei anzeigen

def test_update_key_set():

    """
    Keys should be added to a set of stored keys.
    """

    m = Matrix()
    m.set_pair('a', 'b', 1)
    m.set_pair('a', 'c', 2)

    assert m.keys == set(['a', 'b', 'c'])

Beispiel #7

0

Datei anzeigen

Datei: test_index.py Projekt: ChengQikai/textplot

def test_index():

    """
    index() should index the Bray-Curtis distances between terms.
    """

    t = Text('aa bb cc')
    m = Matrix()

    m.index(t)

    assert m.get_pair('aa', 'bb') == t.score_braycurtis('aa', 'bb')
    assert m.get_pair('aa', 'cc') == t.score_braycurtis('aa', 'cc')
    assert m.get_pair('bb', 'cc') == t.score_braycurtis('bb', 'cc')

Beispiel #8

0

Datei anzeigen

Datei: test_anchored_pairs.py Projekt: xkuang/textplot

def test_anchored_pairs():

    """
    For a given anchor term, anchored_pairs() should return an ordered map of
    term -> distance for all other indexed terms.
    """

    t = Text('aa bb cc dd')
    m = Matrix()

    m.index(t)

    pairs = m.anchored_pairs('aa')

    assert list(pairs.keys()) == ['bb', 'cc', 'dd']
    assert pairs['bb'] > pairs['cc'] > pairs['dd']

Beispiel #9

0

Datei anzeigen

Datei: test_anchored_pairs.py Projekt: zhangjiacool/textplot

def test_anchored_pairs():

    """
    For a given anchor term, anchored_pairs() should return an ordered map of
    term -> distance for all other indexed terms.
    """

    t = Text("aa bb cc dd")
    m = Matrix()

    m.index(t)

    pairs = m.anchored_pairs("aa")

    assert list(pairs.keys()) == ["bb", "cc", "dd"]
    assert pairs["bb"] > pairs["cc"] > pairs["dd"]

Beispiel #10

0

Datei anzeigen

Datei: __init__.py Projekt: AndersNYC/textplot

def frequent(path, term_depth=500, skim_depth=10, d_weights=False, **kwargs):
    """
    Use most frequent terms.
    """

    t = Text.from_file(path)
    m = Matrix(t)

    print('Indexing terms:')
    m.index(t.most_frequent_terms(term_depth), **kwargs)

    g = Skimmer()

    print('Generating graph:')
    g.build(m, skim_depth, d_weights)

    return g

Beispiel #11

0

Datei anzeigen

Datei: test_index.py Projekt: ChengQikai/textplot

def test_term_subset():

    """
    When a subset of terms is passed, just those terms should be indexed.
    """

    t = Text('aa bb cc')
    m = Matrix()

    m.index(t, ['aa', 'bb'])

    # Should index 'aa' and 'bb'.
    assert m.get_pair('aa', 'bb') == t.score_braycurtis('aa', 'bb')

    # Should ignore 'cc'.
    assert not m.get_pair('aa', 'cc')
    assert not m.get_pair('bb', 'cc')

Beispiel #12

0

Datei anzeigen

Datei: __init__.py Projekt: AndersNYC/textplot

def clumpy(path, term_depth=500, skim_depth=10, d_weights=False, **kwargs):
    """
    Use "clumpiest" terms.
    """

    t = Text.from_file(path)
    m = Matrix(t)

    print('Indexing terms:')
    m.index(t.densities(**kwargs).keys()[:term_depth], **kwargs)

    g = Skimmer()

    print('Generating graph:')
    g.build(m, skim_depth, d_weights)

    return g

Beispiel #13

0

Datei anzeigen

Datei: __init__.py Projekt: AndersNYC/textplot

def clumpy(path, term_depth=500, skim_depth=10, d_weights=False, **kwargs):

    """
    Use "clumpiest" terms.
    """

    t = Text.from_file(path)
    m = Matrix(t)

    print('Indexing terms:')
    m.index(t.densities(**kwargs).keys()[:term_depth], **kwargs)

    g = Skimmer()

    print('Generating graph:')
    g.build(m, skim_depth, d_weights)

    return g

Beispiel #14

0

Datei anzeigen

Datei: __init__.py Projekt: AndersNYC/textplot

def frequent(path, term_depth=500, skim_depth=10, d_weights=False, **kwargs):

    """
    Use most frequent terms.
    """

    t = Text.from_file(path)
    m = Matrix(t)

    print('Indexing terms:')
    m.index(t.most_frequent_terms(term_depth), **kwargs)

    g = Skimmer()

    print('Generating graph:')
    g.build(m, skim_depth, d_weights)

    return g

Beispiel #15

0

Datei anzeigen

Datei: helpers.py Projekt: xkuang/textplot

def build_graph(path,
                term_depth=1000,
                skim_depth=10,
                d_weights=False,
                **kwargs):
    """
    Tokenize a text, index a term matrix, and build out a graph.

    Args:
        path (str): The file path.
        term_depth (int): Consider the N most frequent terms.
        skim_depth (int): Connect each word to the N closest siblings.
        d_weights (bool): If true, give "close" nodes low weights.

    Returns:
        Skimmer: The indexed graph.
    """

    # Tokenize text.
    click.echo('\nTokenizing text...')
    t = Text.from_file(path)
    click.echo('Extracted %d tokens' % len(t.tokens))

    m = Matrix()

    # Index the term matrix.
    click.echo('\nIndexing terms:')
    m.index(t, t.most_frequent_terms(term_depth), **kwargs)

    g = Skimmer()

    # Construct the network.
    click.echo('\nGenerating graph:')
    g.build(t, m, skim_depth, d_weights)

    return g

Beispiel #16

0

Datei anzeigen

Datei: test_index.py Projekt: xkuang/textplot

def test_index():
    """
    index() should index the Bray-Curtis distances between terms.
    """

    t = Text('aa bb cc')
    m = Matrix()

    m.index(t)

    assert m.get_pair('aa', 'bb') == t.score_braycurtis('aa', 'bb')
    assert m.get_pair('aa', 'cc') == t.score_braycurtis('aa', 'cc')
    assert m.get_pair('bb', 'cc') == t.score_braycurtis('bb', 'cc')

Beispiel #17

0

Datei anzeigen

Datei: test_index.py Projekt: xkuang/textplot

def test_term_subset():
    """
    When a subset of terms is passed, just those terms should be indexed.
    """

    t = Text('aa bb cc')
    m = Matrix()

    m.index(t, ['aa', 'bb'])

    # Should index 'aa' and 'bb'.
    assert m.get_pair('aa', 'bb') == t.score_braycurtis('aa', 'bb')

    # Should ignore 'cc'.
    assert not m.get_pair('aa', 'cc')
    assert not m.get_pair('bb', 'cc')