Ejemplo n.º 1
0
def test_app3():
    """ Testing for cluster, using test data
    """
    cluster = Cluster(tokenizer="tokenize")
    pipe = PipelineCsv(test_csv)

    for item in pipe.capture_item():
        cluster.put_item(item)
    cluster.cluster()

    extractor = Extractor(cluster)
    for idx, dump in enumerate(cluster.dumps):
        items, vectors, counter = map(list, zip(*dump))
        extracted = extractor.dump(idx)

        pipe.dress_item(extracted)
    print (cluster.distribution)
Ejemplo n.º 2
0
def test_app1():
    """ Testing for cluster, using test data
    """
    cluster = Cluster(epoch=32, tokenizer="stemize")
    pipeline = PipelineFile()

    for item in pipeline.capture_item():
        cluster.put_item(item)
    cluster.cluster()

    extractor = Extractor(cluster)
    for idx, dump in enumerate(cluster.dumps):
        items, vectors, counter = map(list, zip(*dump))

        extracted = extractor.dump(idx)

        assert isinstance(extracted.keywords, list)
        pipeline.dress_item(extracted)
Ejemplo n.º 3
0
def test_extractor1():
    cluster = Cluster(epoch=32, tokenizer="tokenize")
    pipeline = Pipeline()
    for item in pipeline.capture_item():
        cluster.put_item(item)
    cluster.cluster()

    extractor = Extractor(cluster)

    for idx, dump in enumerate(cluster.dumps):
        items, vectors, counter = map(list, zip(*dump))

        assert set(['items', 'vectors', 'counter', 'center',
                    'keywords']) == set(extractable.s.keys())

        extracted = extractor.dump(idx)

        assert isinstance(extracted, Item)
        assert isinstance(extracted.keywords, list)
        assert 32 == len(extracted.keywords)