Ejemplo n.º 1
0
def test_cluster1():
    """ Testing for cluster, using test data
    """
    cluster = Cluster(epoch=32, size=500, tokenizer="tokenize")
    pipeline = Pipeline()
    for item in pipeline.capture_item():
        cluster.put_item(item)
    cluster.cluster()

    # assert '캠프' in list(map(list, zip(*cluster.similar('노무현'))))[0]
    # assert '사건' in list(map(list, zip(*cluster.similar('박근혜'))))[0]

    assert len(cluster.clusters) == len(test_text)
    assert cluster.vectors.shape == (len(test_text), 500)

    assert len(cluster.unique) <= len(test_text)
    assert len(cluster.unique) == len(cluster.dumps)

    for dump in cluster.dumps:
        items, vectors, counter = zip(*dump)

        for item in items:
            assert isinstance(item, Item)

        pipeline.dress_item(items)
Ejemplo n.º 2
0
def test_app3():
    """ Testing for cluster, using test data
    """
    cluster = Cluster(tokenizer="tokenize")
    pipe = PipelineCsv(test_csv)

    for item in pipe.capture_item():
        cluster.put_item(item)
    cluster.cluster()

    extractor = Extractor(cluster)
    for idx, dump in enumerate(cluster.dumps):
        items, vectors, counter = map(list, zip(*dump))
        extracted = extractor.dump(idx)

        pipe.dress_item(extracted)
    print (cluster.distribution)
Ejemplo n.º 3
0
def test_app1():
    """ Testing for cluster, using test data
    """
    cluster = Cluster(epoch=32, tokenizer="stemize")
    pipeline = PipelineFile()

    for item in pipeline.capture_item():
        cluster.put_item(item)
    cluster.cluster()

    extractor = Extractor(cluster)
    for idx, dump in enumerate(cluster.dumps):
        items, vectors, counter = map(list, zip(*dump))

        extracted = extractor.dump(idx)

        assert isinstance(extracted.keywords, list)
        pipeline.dress_item(extracted)
Ejemplo n.º 4
0
def test_extractor1():
    cluster = Cluster(epoch=32, tokenizer="tokenize")
    pipeline = Pipeline()
    for item in pipeline.capture_item():
        cluster.put_item(item)
    cluster.cluster()

    extractor = Extractor(cluster)

    for idx, dump in enumerate(cluster.dumps):
        items, vectors, counter = map(list, zip(*dump))

        assert set(['items', 'vectors', 'counter', 'center',
                    'keywords']) == set(extractable.s.keys())

        extracted = extractor.dump(idx)

        assert isinstance(extracted, Item)
        assert isinstance(extracted.keywords, list)
        assert 32 == len(extracted.keywords)