def setUpClass(cls): print "Loading data..." # Loads the MNIST dataset. traindata = load_mnist( 'data/mnist/train-images.idx3-ubyte', 'data/mnist/train-labels.idx1-ubyte' ) # Choose a validation dataset. validation_size = 5000 perm = np.random.permutation(len(traindata)) traindata.shuffle(perm) validsamples = [] trainsamples = [] trainlabels = traindata.get_labels() validlabels = trainlabels[0:validation_size] trainlabels = trainlabels[validation_size:] i = 0 for sample in traindata: if i < validation_size: validsamples.append(sample) else: trainsamples.append(sample) i += 1 cls.traindata = IdentityDataset(trainsamples, trainlabels) cls.validdata = IdentityDataset(validsamples, validlabels) cls.testdata = load_mnist( 'data/mnist/t10k-images.idx3-ubyte', 'data/mnist/t10k-labels.idx1-ubyte' )
def test_load_test_mnist(): mnist = load_mnist() assert mnist['num_inputs'] == 28 * 28 assert mnist['num_outputs'] == 10 assert mnist['partitions']['test'][0] == 60000 assert max(mnist['partitions']['test']) == 69999 print('train len', len(mnist['partitions']['training'])) assert mnist['data'][0]['output'].A[0].tolist().index(1) == 5 assert mnist['data'][1]['output'].A[0].tolist().index(1) == 0 assert mnist['data'][2]['output'].A[0].tolist().index(1) == 4 assert mnist['data'][3]['output'].A[0].tolist().index(1) == 1
def load_data(mode): (x_train, y_train), (x_test, y_test) = load_mnist(mode) y_train = np_utils.to_categorical(y_train, 10) y_test = np_utils.to_categorical(y_test, 10) x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) batch_size = params.batch_size cut = batch_size * (x_train.shape[0] / batch_size) x_train = x_train[:cut] y_train = y_train[:cut] cut = batch_size * (x_test.shape[0] / batch_size) x_test = x_test[:cut] y_test = y_test[:cut] return (x_train, y_train), (x_test, y_test)
def get_classifier(self) -> CharacterClassifier: if self.__classifier: return self.__classifier self.__classifier = CharacterClassifier(self.__lbls) if os.path.isfile(self.__db_path): logger.info('Loading database...') with open(self.__db_path, 'rb') as file: self.__classifier.matricesPoids = pickle.load(file) logger.info('Done.') else: start_time = time.process_time() logger.info('Creating database') logger.info('Loading dataset...') if self.__mode: ds = dataset.load_ttf('dataset/OpenSans-Bold.ttf', self.__lbls) else: ds = dataset.load_mnist( 'dataset/train_img_mnist.dat', 'dataset/lbl_mnist.dat', dict((i + 1, self.__lbls[i]) for i in range(len(self.__lbls)))) ds = dataset.list_to_dict(ds) self.__classifier.donnee_ent = ds logger.info('Done. (' + str(time.process_time() - start_time) + ' seconds)') self.__classifier.train_liste() with open(self.__db_path, 'wb') as file: pickle.dump(self.__classifier.matricesPoids, file) logger.info('Database created in ' + str(time.process_time() - start_time) + ' seconds.') return self.__classifier
def main(args): s_train, s_test = dataset.load_svhn() t_train, t_test = dataset.load_mnist() s_train_iter = SerialIterator( s_train, args.batchsize, shuffle=True, repeat=True) t_train_iter = SerialIterator( t_test, args.batchsize, shuffle=True, repeat=True) s_test_iter = SerialIterator( s_test, args.batchsize, shuffle=False, repeat=False) t_test_iter = SerialIterator( t_test, args.batchsize, shuffle=False, repeat=False) model = drcn.DRCN() target_model = LossAndAccuracy(model) loss_list = ['loss_cla_s', 'loss_cla_t', 'loss_rec'] optimizer = chainer.optimizers.RMSprop(args.lr) optimizer.setup(model) optimizers = { 'model': optimizer } updater = Updater(s_train_iter, t_train_iter, optimizers, args) out_dir = utils.prepare_dir(args) trainer = Trainer(updater, (args.max_iter, 'iteration'), out=out_dir) trainer.extend(extensions.LogReport(trigger=(args.interval, args.unit))) trainer.extend( extensions.snapshot_object(model, filename='model'), trigger=MaxValueTrigger('acc_t', (args.interval, args.unit))) trainer.extend(extensions.Evaluator(t_test_iter, target_model, device=args.device), trigger=(args.interval, args.unit)) trainer.extend(extensions.PrintReport([args.unit, *loss_list, 'acc_s', 'acc_t', 'elapsed_time'])) trainer.extend(extensions.PlotReport([*loss_list], x_key=args.unit, file_name='loss.png', trigger=(args.interval, args.unit))) trainer.extend(extensions.PlotReport(['acc_s', 'acc_t'], x_key=args.unit, file_name='accuracy.png', trigger=(args.interval, args.unit))) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.run()
import dataset import numpy as np import matplotlib.pyplot as plt # load data X, y = dataset.load_mnist() size = y.size X_n = np.zeros([size, 784]) y_n = np.zeros([size, 1]) # split to train and test train_ratio = 0.4 # train set train_num = int(train_ratio * size) rand = np.random.random([train_num]) * size train_ind = rand.astype(int) X_tr = X[train_ind, :] y_tr = y[train_ind] # test set total = range(size) test_ind = np.array(list(set(total) - set(train_ind))) X_te = X[test_ind, :] y_te = y[test_ind] print "train size", X_tr.shape, y_tr.shape print "test size", X_te.shape, y_te.shape # --- Verify dataset ---
def get_data(): #(훈련 이미지, 훈련 레이블), (시험 이미지, 시험 레이블) (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False) return x_train, t_train, x_test, t_test
# 미니 배치 # 데이터의 일부를 추려 전체의 근사치로 이용한다. # 신경망 학습에도 훈련 데이터로부터 일부만 골라 학습을 수행하며, # 이 일부를 미니 배치라고 한다. # 일부를 무작이로 뽑아 학습하는 방법을 미니 배치 학습이라고 한다. import sys, os import numpy as np from dataset import load_mnist (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) print(x_train.shape) print(t_train.shape) # x_train의 차원 수를 확인한다. train_size = x_train.shape[0] batch_size = 10 # train_size 미만의 수 중에서 batch_size 만큼 뽑아낸다 batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask]
from engine import Engine from dataset import load_mnist import argparse import sys if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--train', action='store_true', help='Run training') parser.add_argument('--test', action='store_true', help='Run testing') args = parser.parse_args(sys.argv[1:]) datasets = load_mnist() engine = Engine(datasets) if args.train: engine.init_engine() engine.train(restore_checkpoint=True) elif args.test: engine.init_engine(is_training=False) engine.test()
ax2.set_ylabel('accuracy', fontsize=23) ax.set_xlabel('epoch', fontsize=23) Ls = L1 + L2 + L3 + L4 labs = [l.get_label() for l in Ls] leg = plt.legend(Ls, labs, loc='lower left', fontsize=18, frameon=True) leg.get_frame().set_edgecolor('k') leg.get_frame().set_linewidth(2) fname = 'epoch_{0}.png'.format(n_epochs) fname = os.path.join(dirpath, fname) fig.savefig(fname, dpi=fig.dpi) plt.close(fig) def plot_rbm_filters(W): plt.figure(figsize=(12, 12)) for i in xrange(64): filt = W[:, i].reshape((28, 28)) plt.subplot(8, 8, i + 1) plt.imshow(filt, cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('First 64 components extracted by RBM', fontsize=24) if __name__ == '__main__': from dataset import load_mnist X, y = load_mnist(mode='train', path='../../data/') plot_greyscale_image(X[0]/255., title='Label is {0}'.format(y[0])) plt.show()
def get_data(): (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False) return x_test, t_test
def run(shape, learning_rate=.1, corruption_rate=0, decay_rate=0, hidden_epochs=100, seed=0, init_with='random'): random.seed(seed) numpy.random.seed(seed) start_time = time.time() print('Running mnist with:', ) print('\tshape:', shape) print('\tlearning_rate:', learning_rate) print('\tcorruption_rate:', corruption_rate) print('\tdecay_rate:', decay_rate) print('\thidden_epochs:', hidden_epochs) print('\tseed:', seed) print('\tinit_with:', init_with) print('load dataset') mnist = load_mnist() data_loaded_at_time = time.time() if init_with == 'sac': print('initialize with sac') initial_net = stacked_auto_encoder( mnist, shape, learning_rate, hidden_epochs=hidden_epochs, corruption_rate=corruption_rate, decay_rate=decay_rate, ) else: initial_net = random_weights(mnist, shape) print('Initial net %: ', evaluate_net(mnist, initial_net, 'test') * 100) initialization_done_time = time.time() # Now train it on the actual data refined_net = train( mnist, initial_net, learning_rate, # This is the max number of epochs to allow. With patience I've # never come close to hitting this. 1000, ) print('After refinement %: ', evaluate_net(mnist, refined_net, 'test') * 100) end_time = time.time() print('start time:', start_time) print('data done loading:', data_loaded_at_time) print('init done at:', initialization_done_time) print('end time:', end_time) print('total elapsed time:', end_time - start_time)
pickle.dump(params, f) def load_params(self, file_name="params.pkl"): with open(file_name, 'rb') as f: params = pickle.load(f) for key, val in params.items(): self.params[key] = val for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']): self.layers[key].W = self.params['W' + str(i + 1)] self.layers[key].b = self.params['b' + str(i + 1)] # 实际训练过程 # 读入数据 (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) max_epochs = 20 network = SimpleConvNet(input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01) trainer = Trainer(network,
y_train, batch_size=128, epochs=50, verbose=2, validation_data=(x_test, y_test)) acc = 100 * model.evaluate(x_test, y_test, verbose=0)[1] print('Test error:', 100 - acc) if pool_type != 'max': acc1 = 100 * model.evaluate( resize_images(x_test, scale=0.8), y_test, verbose=0)[1] print("Test error (scale=0.8): ", 100 - acc1) if __name__ == '__main__': x_train, y_train, x_test, y_test = load_mnist() # Baseline model evaluate_model('max', n_filters=64) evaluate_model('max', n_filters=32) evaluate_model('max', n_filters=4) # Global Max Pooling evaluate_model('gmp', n_filters=16) evaluate_model('gmp', n_filters=24) evaluate_model('gmp', n_filters=64) evaluate_model('gmp', n_filters=128) # SPP evaluate_model('spp', n_filters=8) evaluate_model('spp', n_filters=16)
# coding: utf-8 import sys, os import numpy as np from dataset import load_mnist from PIL import Image def img_show(img): pil_img = Image.fromarray(np.uint8(img)) pil_img.show() #(훈련 이미지, 훈련 레이블), (시험 이미지, 시험 레이블) (x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False) img = x_train[0] label = t_train[0] print(label) # 5 print(img.shape) # (784,) img = img.reshape(28, 28) # 형상을 원래 이미지의 크기로 변형 print(img.shape) # (28, 28) img_show(img)
from tqdm import * import numpy import dataset from nets import simpleNet mnist = dataset.load_mnist() nn = simpleNet(architecture=numpy.array([784 ,100, 10])) for epoch in range(10): success = numpy.zeros(shape=(mnist[0][0].shape[0],)) for example in tqdm(range(mnist[0][0].shape[0])): # input = mnist[0][0][example,:] target = mnist[0][1][example] output = nn.forward(input) reward = -0.00001 if output == target: reward = 0.0001 success[example] += 1 nn.backward(reward) print numpy.sum(success) / success.shape[0]
import os import sys sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 import numpy as np import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt from dataset import load_mnist from common.multi_layer_net_extend import MultiLayerNetExtend from common.trainer import Trainer (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True) # 为了再现过拟合,减少学习数据 x_train = x_train[:300] t_train = t_train[:300] # 设定是否使用Dropuout,以及比例 ======================== use_dropout = True # 不使用Dropout的情况下为False dropout_ratio = 0.2 # ==================================================== network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=301, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True) trainer.train() train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list
if dataset == "imagenet": data_iter = ds.load_imagenet_rec(batch_size, shp[2]) def data_iter_func(): data = data_iter.next() return data.data[0], data.label[0] elif dataset == "voc": val_data = ds.load_voc(batch_size, shp[2]) data_iter = iter(val_data) def data_iter_func(): return next(data_iter) elif dataset == "trec": data_iter = ds.load_trec(batch_size) def data_iter_func(): return next(data_iter) elif dataset == "mnist": val_loader = ds.load_mnist(batch_size) data_iter = iter(val_loader) def data_iter_func(): return next(data_iter) elif dataset == "quickdraw": val_data = ds.load_quickdraw10(batch_size) data_iter = iter(val_data) def data_iter_func(): return next(data_iter) else: assert False, "dataset:%s is not supported" % (dataset) inputs = [mx.sym.var("data")] sym, params = mx.sym.load(sym_file), nd.load(prm_file) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext)