コード例 #1
0
def read_dataset():
    dataset = sets.Ocr()
    dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target'])
    dataset['data'] = dataset.data.reshape(dataset.data.shape[:-2] +
                                           (-1, )).astype(float)
    train, test = sets.Split(0.66)(dataset)
    return train, test
コード例 #2
0
def test_split(dataset):
    one, two = sets.Split(0.5)(dataset)
    assert len(one) + len(two) == len(dataset)
    data = np.concatenate((one.data, two.data))
    target = np.concatenate((one.target, two.target))
    assert (data == dataset.data).all()
    assert (target == dataset.target).all()
コード例 #3
0
ファイル: tfrnn_varlen.py プロジェクト: darraghdog/rnn_mnist
def get_dataset():
    """Read dataset and flatten images."""
    dataset = sets.Ocr()
    dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target'])
    dataset['data'] = dataset.data.reshape(dataset.data.shape[:-2] +
                                           (-1, )).astype(float)
    train, test = sets.Split(0.66)(dataset)
    return train, test
コード例 #4
0
ファイル: main.py プロジェクト: yunfei86/tweet-classifier
def split(full, num):
    """returns the num-th partition of 10-fold cross validation (0 to 9)
    full: sets.core.dataset.Dataset object
    num: int
    rtype: (Dataset, Dataset)"""
    validation_size = len(full) // 10
    data = full.data.tolist()
    target = full.target.tolist()
    length = full.length.tolist()
    for _ in range(num * validation_size):
        data.insert(0, data.pop())
        target.insert(0, target.pop())
        length.insert(0, length.pop())
    data = np.array(data)
    target = np.array(target)
    length = np.array(length)
    full = sets.core.dataset.Dataset(data=data, target=target)
    full.__setitem__('length', length)
    train, validation = sets.Split(0.9)(full)
    return train, validation