예제 #1
0
def random_regression_datasets(n_samples,
                               features=100,
                               outs=1,
                               informative=.1,
                               partition_proportions=(.5, .3),
                               rnd=None,
                               **mk_rgr_kwargs):
    rnd_state = em.get_rand_state(rnd)
    X, Y, w = make_regression(n_samples,
                              features,
                              int(features * informative),
                              outs,
                              random_state=rnd_state,
                              coef=True,
                              **mk_rgr_kwargs)
    if outs == 1:
        Y = np.reshape(Y, (n_samples, 1))

    print('range of Y', np.min(Y), np.max(Y))
    info = utils.merge_dicts(
        {
            'informative': informative,
            'random_seed': rnd,
            'w': w
        }, mk_rgr_kwargs)
    name = em.utils.name_from_dict(info, 'w')
    dt = em.Dataset(X, Y, name=name, info=info)
    datasets = em.Datasets.from_list(redivide_data([dt],
                                                   partition_proportions))
    print('conditioning of X^T X',
          np.linalg.cond(datasets.train.data.T @ datasets.train.data))
    return datasets
예제 #2
0
 def all_data(self, partition_proportions=None, seed=None):
     if not self._loaded_images:
         self.load_all_images()
         while not self.check_loaded_images(600):
             time.sleep(5)
     data, targets = [], []
     for k, c in enumerate(sorted(self._loaded_images)):
         data += list(self._loaded_images[c].values())
         targets += [k] * 600
     if self.info['one_hot_enc']:
         targets = em.to_one_hot_enc(targets,
                                     dimension=len(self._loaded_images))
     _dts = [
         em.Dataset(data=np.stack(data),
                    target=np.array(targets),
                    name='MiniImagenet_full')
     ]
     if seed:
         np.random.seed(seed)
     if partition_proportions:
         _dts = redivide_data(
             _dts,
             partition_proportions=partition_proportions,
             shuffle=True)
     return em.Datasets.from_list(_dts)
예제 #3
0
def random_classification_datasets(n_samples,
                                   features=100,
                                   classes=2,
                                   informative=.1,
                                   partition_proportions=(.5, .3),
                                   rnd=None,
                                   one_hot=True,
                                   **mk_cls_kwargs):
    rnd_state = em.get_rand_state(rnd)
    X, Y = make_classification(n_samples,
                               features,
                               n_classes=classes,
                               random_state=rnd_state,
                               **mk_cls_kwargs)
    if one_hot:
        Y = utils.to_one_hot_enc(Y)

    print('range of Y', np.min(Y), np.max(Y))
    info = utils.merge_dicts({
        'informative': informative,
        'random_seed': rnd
    }, mk_cls_kwargs)
    name = em.utils.name_from_dict(info, 'w')
    dt = em.Dataset(X, Y, name=name, info=info)
    datasets = em.Datasets.from_list(redivide_data([dt],
                                                   partition_proportions))
    print('conditioning of X^T X',
          np.linalg.cond(datasets.train.data.T @ datasets.train.data))
    return datasets
예제 #4
0
def opt(data_root_folder=None,
        one_hot=True,
        partitions=None,
        shuffle=False,
        seed=None):
    """
    data_folder_name = 'mnist'

    if data_root_folder is None:
        data_root_folder = os.path.join(os.getcwd(), 'DATA')
        if not os.path.exists(data_root_folder):
            os.mkdir(data_root_folder)
    data_folder = os.path.join(data_root_folder, data_folder_name)
    """
    #datasets = se.read_data_semeion()
    datasets = Digit.read_opt()
    train = em.Dataset(datasets.train.images,
                       datasets.train.labels,
                       name="opt")
    validation = em.Dataset(datasets.validation.images,
                            datasets.validation.labels,
                            name="opt")
    test = em.Dataset(datasets.test.images, datasets.test.labels, name="opt")
    res = [train, validation, test]

    if partitions:
        res = redivide_data(res,
                            partition_proportions=partitions,
                            shuffle=shuffle,
                            seed=seed)

    return em.Datasets.from_list(res)
예제 #5
0
def mnist(folder=None,
          one_hot=True,
          partitions=None,
          filters=None,
          maps=None,
          shuffle=False):
    if not folder: folder = MNIST_DIR
    datasets = read_data_sets(folder, one_hot=one_hot)
    train = em.Dataset(datasets.train.images,
                       datasets.train.labels,
                       name='MNIST')
    validation = em.Dataset(datasets.validation.images,
                            datasets.validation.labels,
                            name='MNIST')
    test = em.Dataset(datasets.test.images, datasets.test.labels, name='MNIST')
    res = [train, validation, test]
    if partitions:
        res = redivide_data(res,
                            partition_proportions=partitions,
                            filters=filters,
                            maps=maps,
                            shuffle=shuffle)
        res += [None] * (3 - len(res))
    return em.Datasets.from_list(res)