예제 #1
0
def read_dataset():
    dataset = sets.Ocr()
    dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target'])
    dataset['data'] = dataset.data.reshape(dataset.data.shape[:-2] +
                                           (-1, )).astype(float)
    train, test = sets.Split(0.66)(dataset)
    return train, test
예제 #2
0
def get_dataset():
    """Read dataset and flatten images."""
    dataset = sets.Ocr()
    dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target'])
    dataset['data'] = dataset.data.reshape(dataset.data.shape[:-2] +
                                           (-1, )).astype(float)
    train, test = sets.Split(0.66)(dataset)
    return train, test
def test_embedding_found():
    data = list('ceabb')
    target = list('abddd')
    vocabulary = list('abc')
    dataset = sets.Dataset(data=data, target=target)
    dataset, found = sets.OneHot(vocabulary)(
        dataset, columns=['data', 'target'], return_found=True)
    assert found == 6 / 10
예제 #4
0
파일: test_dataset.py 프로젝트: pkuhn/sets
def test_semeval():
    dataset = sets.SemEvalRelation()
    dataset = sets.Tokenize()(dataset)
    dataset = sets.OneHot(dataset.target)(dataset, columns=['target'])
    dataset = sets.WordDistance('<e1>', '<e2>', depth=2)(
        dataset, column='data')
    dataset = sets.Glove(100, depth=2)(dataset, columns=['data'])
    dataset = sets.Concat(2, 'data')(
        dataset, columns=('data', 'word_distance'))
예제 #5
0
def test_onehot(dataset):
    result = sets.OneHot(dataset.target)(dataset, columns=['target'])
    assert result.target.shape[1] == len(np.unique(dataset.target))
    assert (result.target.sum(axis=1)).all()
    assert (result.target.max(axis=1)).all()
예제 #6
0
파일: test_dataset.py 프로젝트: pkuhn/sets
def test_ocr():
    dataset = sets.Ocr()
    dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target'])