Example #1
0
def test_cluster_ncluster():
    '''
    We test if 3 sets of data are clustered into 3 sets.
    n_clusters is specified.
    '''
    CLASSIFIERS = ('cluster.ward', 'cluster.spectral')

    G1 = 'First set'
    G2 = 'Second group'
    G3 = 'Third cluster'
    data = enumerate((G1, G1, G1, G2, G2, G2, G3, G3, G3))
    data = list(data)

    for classifier in CLASSIFIERS:
        baf = Chain().load('data.simple',data) \
                 .process('vectorize.sklearn') \
                 .process(classifier, n_clusters=3)

        result = baf.get_chain('result')

        assert result[0] == result[1] == result[2]
        assert result[3] == result[4] == result[5]
        assert result[6] == result[7] == result[8]

        assert len(set(result)) == 3
Example #2
0
def test_cluster_ncluster():
    '''
    We test if 3 sets of data are clustered into 3 sets.
    n_clusters is specified.
    '''
    CLASSIFIERS = ('cluster.ward', 'cluster.spectral')

    G1 = 'First set'
    G2 = 'Second group'
    G3 = 'Third cluster'
    data = enumerate((G1,G1,G1,G2,G2,G2,G3,G3,G3))
    data = list(data)

    for classifier in CLASSIFIERS:
        baf = Chain().load('data.simple',data) \
                 .process('vectorize.sklearn') \
                 .process(classifier, n_clusters=3)
    
        result = baf.get_chain('result')


        assert result[0] == result[1] == result[2]
        assert result[3] == result[4] == result[5]
        assert result[6] == result[7] == result[8]

        assert len(set(result)) == 3
Example #3
0
def test_core_data():
    """
    Load dummy data and verify it.
    """
    data = ((0, "0"), (1, "1"))
    test_subject = Chain()
    test_subject.load("data.simple", data)
    assert test_subject.get("base_data").data == data
Example #4
0
def test_core_chain():
    '''
    Create a processor and test if
    .get on processor will propagate the call
    '''
    data = (
        (0, '0'),
        (1, '1'),
    )
    test_subject = Chain()

    with pytest.raises(KeyError) as excinfo:
        assert test_subject.get('test') == 'ok'

    test_subject.data['test'] = 'ok'

    assert test_subject.get('test') == 'ok'

    test_subject.load('data.simple', data)

    # get data from the main class through the chain
    assert test_subject.chain[-1].get('base_data').data == data
    assert test_subject.get_chain('base_data').data == data

    # get data from the chain
    test_subject.chain[-1]._data['last_data'] = 'last_data'
    assert test_subject.get_chain('last_data') == 'last_data'
Example #5
0
def test_core_data():
    '''
    Load dummy data and verify it.
    '''
    data = (
        (0, '0'),
        (1, '1'),
    )
    test_subject = Chain()
    test_subject.load('data.simple', data)
    assert test_subject.get('base_data').data == data
Example #6
0
def test_cluster():

    data = ((0, 'data test 1'), (1, 'Data test 2'), (2, 'other data test 3.'))
    test_subject = Chain().load(
        'data.simple',
        data).process('vectorize.sklearn').process('cluster.ward')

    assert isinstance(test_subject.chain[0], SimpleDataSource)
    assert isinstance(test_subject.chain[1], CountVectorizer)
    assert isinstance(test_subject.chain[2], WardClusterizer)

    result = test_subject.get_chain('result')
    assert len(result) == len(data)

    assert result.tolist() == test_subject.chain[-1].get('result').tolist()
    assert result.tolist() == test_subject.chain[-1]._data['result'].tolist()
Example #7
0
def test_cluster():
    
    data=((0,'data test 1'),
          (1,'Data test 2'),
          (2,'other data test 3.')
          )
    test_subject = Chain().load('data.simple',data).process('vectorize.sklearn').process('cluster.ward')

    
    assert isinstance(test_subject.chain[0],SimpleDataSource)
    assert isinstance(test_subject.chain[1],CountVectorizer)
    assert isinstance(test_subject.chain[2],WardClusterizer)
    
    result = test_subject.get_chain('result')
    assert len(result) == len(data)

    assert result.tolist() == test_subject.chain[-1].get('result').tolist()
    assert result.tolist() == test_subject.chain[-1]._data['result'].tolist()
Example #8
0
def test_data_simple():

    data = ((0, 'data test 1'), (1, 'Data test 2'), (2, 'other data test 3.'))
    test_subject = Chain().load('data.simple', data)
    data = test_subject.data['base_data']

    assert test_subject.data['base_data'] == test_subject.data['main_data']
    assert data.get_data() == [
        'data test 1', 'Data test 2', 'other data test 3.'
    ]
    assert data.get_uids() == [0, 1, 2]
Example #9
0
def test_core_data_chain():
    '''
    When overriding data the last data should be used.
    '''
    data = (
        (0, '0'),
        (1, '1'),
    )
    data_new = ((0, "new"), (1, "new"))

    test_subject = Chain()
    test_subject.load('data.simple', data)
    assert test_subject.get_chain('data_source').data == data
    test_subject.load('data.simple', data_new)
    assert test_subject.get_chain('data_source').data == data_new
Example #10
0
def test_core_data_chain():
    """
    When overriding data the last data should be used.
    """
    data = ((0, "0"), (1, "1"))
    data_new = ((0, "new"), (1, "new"))

    test_subject = Chain()
    test_subject.load("data.simple", data)
    assert test_subject.get_chain("data_source").data == data
    test_subject.load("data.simple", data_new)
    assert test_subject.get_chain("data_source").data == data_new
Example #11
0
def test_core_load_json():
    '''
    Load a chain from a json file.
    '''

    baf = Chain.load_json(path=
            os.path.join(os.path.dirname(__file__), "data1.json")
            )

    assert baf.data['lang'] == 'fr'
    assert  baf.data_sources[-1].data == [
            (1, [u'ceci', u'\xeatre', u'un', u'test', u'.']), 
            (2, [u'cette', u'phrase', u'\xeatre', u'un', u'autre', u'test', u'.'])
            ]
Example #12
0
def test_data_save_load():

    data = ((0, "data test 1"), (1, "Data test 2"), (2, "other data test 3."))
    test_subject = Chain().load("data.simple", data)
    data = test_subject.data["base_data"]
    test_subject.process("vectorize.sklearn")

    with tempfile.TemporaryDirectory() as tmpdirname:
        save_path = os.path.join(tmpdirname, "save.pkl")
        test_subject.process("pickle.save", save_path)
        load_test = Chain().load("pickle.load", save_path)

        assert (
            load_test.get_chain("vectorizer").transform(("test",)).toarray()[0].tolist()
            == test_subject.get_chain("vectorizer").transform(("test",)).toarray()[0].tolist()
            == [0, 0, 1]
        )
Example #13
0
def test_core_load_yaml():
    '''
    Load a chain from a yaml file.
    '''

    baf = Chain.load_yaml(path=
            os.path.join(os.path.dirname(__file__), "data2.yaml")
            )

    assert baf.data['lang'] == 'en'
    print(baf.data_sources[-1].data )
    assert  baf.data_sources[-1].data == [
            (1, [u'this', u'be', u'a', u'test', u'.']), 
            (2, [u'another', u'test', u'sentence', u'.']), 
            (3, [u'yet', u'another', u'sentence', u'.']), 
            (4, [u'two', u'sentence', u'this', u'time', u'.'])
            ]
Example #14
0
def test_core_chain():
    """
    Create a processor and test if
    .get on processor will propagate the call
    """
    data = ((0, "0"), (1, "1"))
    test_subject = Chain()

    with pytest.raises(KeyError) as excinfo:
        assert test_subject.get("test") == "ok"

    test_subject.data["test"] = "ok"

    assert test_subject.get("test") == "ok"

    test_subject.load("data.simple", data)

    # get data from the main class through the chain
    assert test_subject.chain[-1].get("base_data").data == data
    assert test_subject.get_chain("base_data").data == data

    # get data from the chain
    test_subject.chain[-1]._data["last_data"] = "last_data"
    assert test_subject.get_chain("last_data") == "last_data"
Example #15
0
def fixture():
    '''
    Creates a processor chain with test data.
    '''
    data = ((0, 'data test 1'), (1, 'Data test 2'), (2, 'other data test 3.'))
    return Chain().load('data.simple', data)