def set_iter(self): """Prepare data iterator for training and validation""" self.train_iter = io.NDArrayIter(self.im[self.train_idx], self.lab[self.train_idx], batch_size=self.train_batch_size, shuffle=True) self.val_iter = io.NDArrayIter(self.im[self.val_idx], self.lab[self.val_idx], batch_size=self.val_batch_size)
def set_iter(opts): """data iterator""" train_iter = io.NDArrayIter(data=np.arange(opts.train_amount), shuffle=True, label=np.zeros((opts.train_amount, )), batch_size=opts.batch_size) val_iter = io.NDArrayIter(data=np.arange(opts.val_amount), label=np.zeros((opts.val_amount, )), batch_size=opts.batch_size) return train_iter, val_iter
def fit(self, train_set, batch_size=10, num_epochs=10, gibbs_sampling_steps=1, learning_rate=0.01): """ Fit the model to the training data. :param train_set: training set """ assert isinstance(train_set, nd.NDArray) assert len(train_set.shape) == 2 assert train_set.shape[1] == self.weights.shape[0] train_set = train_set.reshape( (train_set.shape[0], 1, train_set.shape[1])) for _ in range(num_epochs): """ For each epoch shuffle the training set. Iteratively do batch training. """ for batch in io.NDArrayIter(data=train_set, shuffle=True, batch_size=batch_size, last_batch_handle='discard'): self._train_batch(batch.data[0], gibbs_sampling_steps, learning_rate)
def __init__(self, X, Y, batch_size=1, shuffle=True, ctx=mx.cpu()): self.data_iter = io.NDArrayIter( data=gluon.utils.split_and_load(np.transpose(X, [0, 3, 1, 2]), ctx_list=ctx, batch_axis=0), label=gluon.utils.split_and_load(Y, ctx_list=ctx, batch_axis=0), batch_size=batch_size, shuffle=shuffle) self.len = len(X)
def SiftSmallIter(dataPath, trainNum, valNum, batchSize): data=ReadFvecs(dataPath,"siftsmall_learn.fvecs") data=data.astype(npy.float32)*0.01 ndata=data.shape[0] ntrain=npy.minimum(trainNum,20000) nval=npy.minimum(valNum,5000) idxRand=npy.arange(ndata) npy.random.shuffle(idxRand) trainIter=mxio.NDArrayIter( data=data[idxRand[:ntrain],:], batch_size=batchSize, shuffle=True, last_batch_handle="discard") valIter=mxio.NDArrayIter( data=data[idxRand[ntrain:ntrain+nval],:], batch_size=batchSize, shuffle=False, last_batch_handle="discard") return (trainIter, valIter)
def get_batch_data(data: DataFrame, cols: dict, workers, batch_size, style: str, is_test=False, shuffle=True): in_data = data[cols["continuous"] + cols["categorical"]["numeric"] + cols["categorical"]["string"]].values.astype(float32) if style == "imperative": if is_test: data_set = gluon.data.ArrayDataset(in_data) else: target_data = data[cols["target"]].values.astype(float32) data_set = gluon.data.ArrayDataset(in_data, target_data) return gluon.data.DataLoader(dataset=data_set, batch_size=batch_size, shuffle=shuffle, num_workers=workers, thread_pool=True) elif style == "symbolic": if is_test: data_iter = io.NDArrayIter(data=in_data, batch_size=batch_size, shuffle=shuffle, data_name="data") else: target_data = data[cols["target"]].values.astype(float32) data_iter = io.NDArrayIter(data=in_data, label=target_data, batch_size=batch_size, shuffle=shuffle, data_name="data", label_name="target", last_batch_handle="roll_over") return data_iter
def _init_iter(self, X, y, is_train): """Initialize the iterator given input.""" if isinstance(X, (np.ndarray, nd.NDArray)): if y is None: if is_train: raise ValueError('y must be specified when X is numpy.ndarray') else: y = np.zeros(X.shape[0]) if not isinstance(y, (np.ndarray, nd.NDArray)): raise TypeError('y must be ndarray when X is numpy.ndarray') if X.shape[0] != y.shape[0]: raise ValueError("The numbers of data points and labels not equal") if y.ndim == 2 and y.shape[1] == 1: y = y.flatten() if y.ndim != 1: raise ValueError("Label must be 1D or 2D (with 2nd dimension being 1)") if is_train: return io.NDArrayIter(X, y, min(X.shape[0], self.numpy_batch_size), shuffle=is_train, last_batch_handle='roll_over') else: return io.NDArrayIter(X, y, min(X.shape[0], self.numpy_batch_size), shuffle=False) if not isinstance(X, io.DataIter): raise TypeError('X must be DataIter, NDArray or numpy.ndarray') return X
def dataloader(self, data, batch_size, shuffle=True): ''' Constructs a data loader for generating minibatches of data. Args ---- data: numpy array, no default The data from which to load minibatches. batch_size: integer, no default The # of samples returned in each minibatch. shuffle: boolean, default True Whether or not to shuffle the data prior to returning the data loader. Returns ------- DataLoader: A gluon DataLoader iterator ''' if data is None: return None else: # inds = np.arange(data.shape[0]) # if shuffle: # np.random.shuffle(inds) # ordered = data[inds] # N, r = divmod(data.shape[0], batch_size) # if r > 0: # ordered = np.vstack([ordered, ordered[:r]]) if type(data) is np.ndarray: return gluon.data.DataLoader(data, batch_size, last_batch='discard', shuffle=shuffle) else: return io.NDArrayIter(data={'data': data}, batch_size=batch_size, shuffle=shuffle, last_batch_handle='discard')
def save_latent(self, saveto): before_softmax = True try: if type(self.data.data['train']) is np.ndarray: dataset_train = gluon.data.dataset.ArrayDataset(self.data.data['train']) train_data = gluon.data.DataLoader(dataset_train, self.args['batch_size'], shuffle=False, last_batch='discard') dataset_val = gluon.data.dataset.ArrayDataset(self.data.data['valid']) val_data = gluon.data.DataLoader(dataset_val, self.args['batch_size'], shuffle=False, last_batch='discard') dataset_test = gluon.data.dataset.ArrayDataset(self.data.data['test']) test_data = gluon.data.DataLoader(dataset_test, self.args['batch_size'], shuffle=False, last_batch='discard') else: train_data = io.NDArrayIter(data={'data': self.data.data['train']}, batch_size=self.args['batch_size'], shuffle=False, last_batch_handle='discard') val_data = io.NDArrayIter(data={'data': self.data.data['valid']}, batch_size=self.args['batch_size'], shuffle=False, last_batch_handle='discard') test_data = io.NDArrayIter(data={'data': self.data.data['test']}, batch_size=self.args['batch_size'], shuffle=False, last_batch_handle='discard') except: print("Loading error during save_latent. Probably caused by not having validation or test set!") return train_output = np.zeros((self.data.data['train'].shape[0], self.ndim_y)) # train_label_output = np.zeros(self.data.data['train'].shape[0]) # for i, (data, label) in enumerate(train_data): for i, data in enumerate(train_data): if type(data) is io.DataBatch: data = data.data[0].as_in_context(self.model_ctx) else: data = data.as_in_context(self.model_ctx) if before_softmax: output = self.Enc(data) else: output = nd.softmax(self.Enc(data)) train_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = output.asnumpy() # train_label_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = label.asnumpy() train_output = np.delete(train_output, np.s_[(i+1)*self.args['batch_size']:], 0) # train_label_output = np.delete(train_label_output, np.s_[(i+1)*self.args['batch_size']:]) np.save(os.path.join(saveto, self.args['domain']+'train_latent.npy'), train_output) # np.save(os.path.join(saveto, self.args['domain']+'train_latent_label.npy'), train_label_output) val_output = np.zeros((self.data.data['valid'].shape[0], self.ndim_y)) # train_label_output = np.zeros(self.data.data['train'].shape[0]) # for i, (data, label) in enumerate(train_data): for i, data in enumerate(val_data): if type(data) is io.DataBatch: data = data.data[0].as_in_context(self.model_ctx) else: data = data.as_in_context(self.model_ctx) if before_softmax: output = self.Enc(data) else: output = nd.softmax(self.Enc(data)) val_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = output.asnumpy() # train_label_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = label.asnumpy() val_output = np.delete(val_output, np.s_[(i+1)*self.args['batch_size']:], 0) # train_label_output = np.delete(train_label_output, np.s_[(i+1)*self.args['batch_size']:]) np.save(os.path.join(saveto, self.args['domain']+'val_latent.npy'), val_output) # np.save(os.path.join(saveto, self.args['domain']+'train_latent_label.npy'), train_label_output) test_output = np.zeros((self.data.data['test'].shape[0], self.ndim_y)) # test_label_output = np.zeros(self.data.data['test'].shape[0]) # for i, (data, label) in enumerate(test_data): for i, data in enumerate(test_data): if type(data) is io.DataBatch: data = data.data[0].as_in_context(self.model_ctx) else: data = data.as_in_context(self.model_ctx) if before_softmax: output = self.Enc(data) else: output = nd.softmax(self.Enc(data)) test_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = output.asnumpy() # test_label_output[i*self.args['batch_size']:(i+1)*self.args['batch_size']] = label.asnumpy() test_output = np.delete(test_output, np.s_[(i+1)*self.args['batch_size']:], 0) # test_label_output = np.delete(test_label_output, np.s_[(i+1)*self.args['batch_size']:]) np.save(os.path.join(saveto, self.args['domain']+'test_latent.npy'), test_output) # np.save(os.path.join(saveto, self.args['domain']+'test_latent_label.npy'), test_label_output)
def set_iter(self): """Prepare data iterator for test""" self.test_iter = io.NDArrayIter(self.im, self.lab, batch_size=self.test_batch_size)
else: net.load_params('%snet-%04d' % (dir_out_checkpoints, opts.resumed_epoch)) # define trainer trainer = gluon.Trainer(net.collect_params(), optimizer=opts.optimizer, optimizer_params={ 'learning_rate': opts.base_lr, 'wd': opts.wd }) # define data iteration train_iter = io.NDArrayIter({ 'data': im_train, 'mask': mask_train }, {'label': c_labels_train}, batch_size=opts.train_batch_size, shuffle=True) val_iter = io.NDArrayIter({ 'data': im_val, 'mask': mask_val }, {'label': c_labels_val}, batch_size=opts.val_batch_size) # losses losses = {} for l in opts.losses: losses[l] = CC.losses[l]() # validation data to GPU im_val = nd.array(im_val, ctx=ctx)