예제 #1
0
파일: test_cluster.py 프로젝트: moas/bakfu
def test_cluster_ncluster():
    '''
    We test if 3 sets of data are clustered into 3 sets.
    n_clusters is specified.
    '''
    CLASSIFIERS = ('cluster.ward', 'cluster.spectral')

    G1 = 'First set'
    G2 = 'Second group'
    G3 = 'Third cluster'
    data = enumerate((G1, G1, G1, G2, G2, G2, G3, G3, G3))
    data = list(data)

    for classifier in CLASSIFIERS:
        baf = Chain().load('data.simple',data) \
                 .process('vectorize.sklearn') \
                 .process(classifier, n_clusters=3)

        result = baf.get_chain('result')

        assert result[0] == result[1] == result[2]
        assert result[3] == result[4] == result[5]
        assert result[6] == result[7] == result[8]

        assert len(set(result)) == 3
예제 #2
0
파일: test_core.py 프로젝트: moas/bakfu
def test_core_chain():
    '''
    Create a processor and test if
    .get on processor will propagate the call
    '''
    data = (
        (0, '0'),
        (1, '1'),
    )
    test_subject = Chain()

    with pytest.raises(KeyError) as excinfo:
        assert test_subject.get('test') == 'ok'

    test_subject.data['test'] = 'ok'

    assert test_subject.get('test') == 'ok'

    test_subject.load('data.simple', data)

    # get data from the main class through the chain
    assert test_subject.chain[-1].get('base_data').data == data
    assert test_subject.get_chain('base_data').data == data

    # get data from the chain
    test_subject.chain[-1]._data['last_data'] = 'last_data'
    assert test_subject.get_chain('last_data') == 'last_data'
예제 #3
0
def test_data_simple():

    data = ((0, 'data test 1'), (1, 'Data test 2'), (2, 'other data test 3.'))
    test_subject = Chain().load('data.simple', data)
    data = test_subject.data['base_data']

    assert test_subject.data['base_data'] == test_subject.data['main_data']
    assert data.get_data() == [
        'data test 1', 'Data test 2', 'other data test 3.'
    ]
    assert data.get_uids() == [0, 1, 2]
예제 #4
0
파일: test_core.py 프로젝트: moas/bakfu
def test_core_data():
    '''
    Load dummy data and verify it.
    '''
    data = (
        (0, '0'),
        (1, '1'),
    )
    test_subject = Chain()
    test_subject.load('data.simple', data)
    assert test_subject.get('base_data').data == data
예제 #5
0
파일: test_core.py 프로젝트: moas/bakfu
def test_core_data_chain():
    '''
    When overriding data the last data should be used.
    '''
    data = (
        (0, '0'),
        (1, '1'),
    )
    data_new = ((0, "new"), (1, "new"))

    test_subject = Chain()
    test_subject.load('data.simple', data)
    assert test_subject.get_chain('data_source').data == data
    test_subject.load('data.simple', data_new)
    assert test_subject.get_chain('data_source').data == data_new
예제 #6
0
파일: test_cluster.py 프로젝트: moas/bakfu
def test_cluster():

    data = ((0, 'data test 1'), (1, 'Data test 2'), (2, 'other data test 3.'))
    test_subject = Chain().load(
        'data.simple',
        data).process('vectorize.sklearn').process('cluster.ward')

    assert isinstance(test_subject.chain[0], SimpleDataSource)
    assert isinstance(test_subject.chain[1], CountVectorizer)
    assert isinstance(test_subject.chain[2], WardClusterizer)

    result = test_subject.get_chain('result')
    assert len(result) == len(data)

    assert result.tolist() == test_subject.chain[-1].get('result').tolist()
    assert result.tolist() == test_subject.chain[-1]._data['result'].tolist()
예제 #7
0
def fixture():
    '''
    Creates a processor chain with test data.
    '''
    data = ((0, 'data test 1'), (1, 'Data test 2'), (2, 'other data test 3.'))
    return Chain().load('data.simple', data)