예제 #1
0
 def set_iter(self):
     """Prepare data iterator for training and validation"""
     self.train_iter = io.NDArrayIter(self.im[self.train_idx],
                                      self.lab[self.train_idx],
                                      batch_size=self.train_batch_size, shuffle=True)
     self.val_iter = io.NDArrayIter(self.im[self.val_idx],
                                    self.lab[self.val_idx],
                                    batch_size=self.val_batch_size)
예제 #2
0
def set_iter(opts):
    """data iterator"""
    train_iter = io.NDArrayIter(data=np.arange(opts.train_amount),
                                shuffle=True,
                                label=np.zeros((opts.train_amount, )),
                                batch_size=opts.batch_size)
    val_iter = io.NDArrayIter(data=np.arange(opts.val_amount),
                              label=np.zeros((opts.val_amount, )),
                              batch_size=opts.batch_size)
    return train_iter, val_iter
예제 #3
0
    def fit(self,
            train_set,
            batch_size=10,
            num_epochs=10,
            gibbs_sampling_steps=1,
            learning_rate=0.01):
        """ Fit the model to the training data.
        :param train_set: training set
        """
        assert isinstance(train_set, nd.NDArray)
        assert len(train_set.shape) == 2
        assert train_set.shape[1] == self.weights.shape[0]

        train_set = train_set.reshape(
            (train_set.shape[0], 1, train_set.shape[1]))

        for _ in range(num_epochs):
            """ For each epoch shuffle the training set.
            Iteratively do batch training.
            """
            for batch in io.NDArrayIter(data=train_set,
                                        shuffle=True,
                                        batch_size=batch_size,
                                        last_batch_handle='discard'):
                self._train_batch(batch.data[0], gibbs_sampling_steps,
                                  learning_rate)
예제 #4
0
 def __init__(self, X, Y, batch_size=1, shuffle=True, ctx=mx.cpu()):
     self.data_iter = io.NDArrayIter(
         data=gluon.utils.split_and_load(np.transpose(X, [0, 3, 1, 2]),
                                         ctx_list=ctx,
                                         batch_axis=0),
         label=gluon.utils.split_and_load(Y, ctx_list=ctx, batch_axis=0),
         batch_size=batch_size,
         shuffle=shuffle)
     self.len = len(X)
예제 #5
0
def SiftSmallIter(dataPath, trainNum, valNum, batchSize):    
    data=ReadFvecs(dataPath,"siftsmall_learn.fvecs")
    data=data.astype(npy.float32)*0.01
    ndata=data.shape[0]
    ntrain=npy.minimum(trainNum,20000)
    nval=npy.minimum(valNum,5000)
    idxRand=npy.arange(ndata)
    npy.random.shuffle(idxRand)
    trainIter=mxio.NDArrayIter(
        data=data[idxRand[:ntrain],:],
        batch_size=batchSize,
        shuffle=True,
        last_batch_handle="discard")
    valIter=mxio.NDArrayIter(
        data=data[idxRand[ntrain:ntrain+nval],:],
        batch_size=batchSize,
        shuffle=False,
        last_batch_handle="discard")
    return (trainIter, valIter)
    def get_batch_data(data: DataFrame,
                       cols: dict,
                       workers,
                       batch_size,
                       style: str,
                       is_test=False,
                       shuffle=True):
        in_data = data[cols["continuous"] + cols["categorical"]["numeric"] +
                       cols["categorical"]["string"]].values.astype(float32)

        if style == "imperative":
            if is_test:
                data_set = gluon.data.ArrayDataset(in_data)
            else:
                target_data = data[cols["target"]].values.astype(float32)
                data_set = gluon.data.ArrayDataset(in_data, target_data)

            return gluon.data.DataLoader(dataset=data_set,
                                         batch_size=batch_size,
                                         shuffle=shuffle,
                                         num_workers=workers,
                                         thread_pool=True)

        elif style == "symbolic":
            if is_test:
                data_iter = io.NDArrayIter(data=in_data,
                                           batch_size=batch_size,
                                           shuffle=shuffle,
                                           data_name="data")
            else:
                target_data = data[cols["target"]].values.astype(float32)
                data_iter = io.NDArrayIter(data=in_data,
                                           label=target_data,
                                           batch_size=batch_size,
                                           shuffle=shuffle,
                                           data_name="data",
                                           label_name="target",
                                           last_batch_handle="roll_over")
            return data_iter
예제 #7
0
 def _init_iter(self, X, y, is_train):
     """Initialize the iterator given input."""
     if isinstance(X, (np.ndarray, nd.NDArray)):
         if y is None:
             if is_train:
                 raise ValueError('y must be specified when X is numpy.ndarray')
             else:
                 y = np.zeros(X.shape[0])
         if not isinstance(y, (np.ndarray, nd.NDArray)):
             raise TypeError('y must be ndarray when X is numpy.ndarray')
         if X.shape[0] != y.shape[0]:
             raise ValueError("The numbers of data points and labels not equal")
         if y.ndim == 2 and y.shape[1] == 1:
             y = y.flatten()
         if y.ndim != 1:
             raise ValueError("Label must be 1D or 2D (with 2nd dimension being 1)")
         if is_train:
             return io.NDArrayIter(X, y, min(X.shape[0], self.numpy_batch_size),
                                   shuffle=is_train, last_batch_handle='roll_over')
         else:
             return io.NDArrayIter(X, y, min(X.shape[0], self.numpy_batch_size), shuffle=False)
     if not isinstance(X, io.DataIter):
         raise TypeError('X must be DataIter, NDArray or numpy.ndarray')
     return X
예제 #8
0
    def dataloader(self, data, batch_size, shuffle=True):
        '''
        Constructs a data loader for generating minibatches of data.

        Args
        ----
        data: numpy array, no default
          The data from which to load minibatches.
        batch_size: integer, no default
          The # of samples returned in each minibatch.
        shuffle: boolean, default True
          Whether or not to shuffle the data prior to returning the data loader.

        Returns
        -------
        DataLoader: A gluon DataLoader iterator
        '''
        if data is None:
            return None
        else:
            # inds = np.arange(data.shape[0])
            # if shuffle:
            #     np.random.shuffle(inds)
            # ordered = data[inds]
            # N, r = divmod(data.shape[0], batch_size)
            # if r > 0:
            #     ordered = np.vstack([ordered, ordered[:r]])
            if type(data) is np.ndarray:
                return gluon.data.DataLoader(data,
                                             batch_size,
                                             last_batch='discard',
                                             shuffle=shuffle)
            else:
                return io.NDArrayIter(data={'data': data},
                                      batch_size=batch_size,
                                      shuffle=shuffle,
                                      last_batch_handle='discard')
예제 #9
0
    def save_latent(self, saveto):
        before_softmax = True
        try:
            if type(self.data.data['train']) is np.ndarray:
                dataset_train = gluon.data.dataset.ArrayDataset(self.data.data['train'])
                train_data = gluon.data.DataLoader(dataset_train, self.args['batch_size'], shuffle=False, last_batch='discard')

                dataset_val = gluon.data.dataset.ArrayDataset(self.data.data['valid'])
                val_data = gluon.data.DataLoader(dataset_val, self.args['batch_size'], shuffle=False, last_batch='discard')

                dataset_test = gluon.data.dataset.ArrayDataset(self.data.data['test'])
                test_data = gluon.data.DataLoader(dataset_test, self.args['batch_size'], shuffle=False, last_batch='discard')
            else:
                train_data = io.NDArrayIter(data={'data': self.data.data['train']}, batch_size=self.args['batch_size'],
                                            shuffle=False, last_batch_handle='discard')
                val_data = io.NDArrayIter(data={'data': self.data.data['valid']}, batch_size=self.args['batch_size'],
                                            shuffle=False, last_batch_handle='discard')
                test_data = io.NDArrayIter(data={'data': self.data.data['test']}, batch_size=self.args['batch_size'],
                                            shuffle=False, last_batch_handle='discard')
        except:
            print("Loading error during save_latent. Probably caused by not having validation or test set!")
            return

        train_output = np.zeros((self.data.data['train'].shape[0], self.ndim_y))
        # train_label_output = np.zeros(self.data.data['train'].shape[0])
        # for i, (data, label) in enumerate(train_data):
        for i, data in enumerate(train_data):
            if type(data) is io.DataBatch:
                data = data.data[0].as_in_context(self.model_ctx)
            else:
                data = data.as_in_context(self.model_ctx)
            if before_softmax:
                output = self.Enc(data)
            else:
                output = nd.softmax(self.Enc(data))
            train_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = output.asnumpy()
            # train_label_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = label.asnumpy()
        train_output = np.delete(train_output, np.s_[(i+1)*self.args['batch_size']:], 0)
        # train_label_output = np.delete(train_label_output, np.s_[(i+1)*self.args['batch_size']:])
        np.save(os.path.join(saveto, self.args['domain']+'train_latent.npy'), train_output)
        # np.save(os.path.join(saveto, self.args['domain']+'train_latent_label.npy'), train_label_output)

        val_output = np.zeros((self.data.data['valid'].shape[0], self.ndim_y))
        # train_label_output = np.zeros(self.data.data['train'].shape[0])
        # for i, (data, label) in enumerate(train_data):
        for i, data in enumerate(val_data):
            if type(data) is io.DataBatch:
                data = data.data[0].as_in_context(self.model_ctx)
            else:
                data = data.as_in_context(self.model_ctx)
            if before_softmax:
                output = self.Enc(data)
            else:
                output = nd.softmax(self.Enc(data))
            val_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = output.asnumpy()
            # train_label_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = label.asnumpy()
        val_output = np.delete(val_output, np.s_[(i+1)*self.args['batch_size']:], 0)
        # train_label_output = np.delete(train_label_output, np.s_[(i+1)*self.args['batch_size']:])
        np.save(os.path.join(saveto, self.args['domain']+'val_latent.npy'), val_output)
        # np.save(os.path.join(saveto, self.args['domain']+'train_latent_label.npy'), train_label_output)

        test_output = np.zeros((self.data.data['test'].shape[0], self.ndim_y))
        # test_label_output = np.zeros(self.data.data['test'].shape[0])
        # for i, (data, label) in enumerate(test_data):
        for i, data in enumerate(test_data):
            if type(data) is io.DataBatch:
                data = data.data[0].as_in_context(self.model_ctx)
            else:
                data = data.as_in_context(self.model_ctx)
            if before_softmax:
                output = self.Enc(data)
            else:
                output = nd.softmax(self.Enc(data))
            test_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = output.asnumpy()
            # test_label_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = label.asnumpy()
        test_output = np.delete(test_output, np.s_[(i+1)*self.args['batch_size']:], 0)
        # test_label_output = np.delete(test_label_output, np.s_[(i+1)*self.args['batch_size']:])
        np.save(os.path.join(saveto, self.args['domain']+'test_latent.npy'), test_output)
        # np.save(os.path.join(saveto, self.args['domain']+'test_latent_label.npy'), test_label_output)
예제 #10
0
파일: test.py 프로젝트: minhto2802/T2_ADC
 def set_iter(self):
     """Prepare data iterator for test"""
     self.test_iter = io.NDArrayIter(self.im,
                                     self.lab,
                                     batch_size=self.test_batch_size)
    else:
        net.load_params('%snet-%04d' %
                        (dir_out_checkpoints, opts.resumed_epoch))

    # define trainer
    trainer = gluon.Trainer(net.collect_params(),
                            optimizer=opts.optimizer,
                            optimizer_params={
                                'learning_rate': opts.base_lr,
                                'wd': opts.wd
                            })

    # define data iteration
    train_iter = io.NDArrayIter({
        'data': im_train,
        'mask': mask_train
    }, {'label': c_labels_train},
                                batch_size=opts.train_batch_size,
                                shuffle=True)
    val_iter = io.NDArrayIter({
        'data': im_val,
        'mask': mask_val
    }, {'label': c_labels_val},
                              batch_size=opts.val_batch_size)

    # losses
    losses = {}
    for l in opts.losses:
        losses[l] = CC.losses[l]()

    # validation data to GPU
    im_val = nd.array(im_val, ctx=ctx)