def train_single_model(model, dataset, args, name="teacher"): print("training %s model" % name) train_x, train_y, test_x, test_y = dataset iterator = BatchIterator(batch_size=args.batch_size) for epoch in range(args.num_ep): t_start = time.time() for i, batch in enumerate(iterator(train_x, train_y)): pred = model.forward(batch.inputs) loss, grads = model.backward(pred, batch.targets) model.apply_grads(grads) log = accuracy(np.argmax(pred, 1), np.argmax(batch.targets, 1)) log.update({"batch": i, "loss": loss}) print(log) print("Epoch %d time cost: %.4f" % (epoch, time.time() - t_start)) # evaluate model.set_phase("TEST") hit, total = 0, 0 for i, batch in enumerate(iterator(test_x, test_y)): pred = model.forward(batch.inputs) res = accuracy(np.argmax(pred, 1), np.argmax(batch.targets, 1)) hit += res["hit_num"] total += res["total_num"] print("accuracy: %.4f" % (1.0 * hit / total)) model.set_phase("TRAIN") # save model if not os.path.isdir(args.model_dir): os.makedirs(args.model_dir) model_path = os.path.join(args.model_dir, name + ".model") model.save(model_path) print("model saved in %s" % model_path)
def train_distill_model(dataset, args): # load dataset train_x, train_y, test_x, test_y = dataset # load or train a teacher model teacher = Model(net=teacher_net, loss=SoftmaxCrossEntropy(), optimizer=Adam(lr=args.lr)) teacher_model_path = os.path.join(args.model_dir, "teacher.model") if not os.path.isfile(teacher_model_path): print("No teacher model founded. Training a new one...") train_single_model(teacher, dataset, args, name="teacher") teacher.load(teacher_model_path) teacher.set_phase("TEST") print("training distill model") # define a student model student = Model(net=student_net, loss=DistillationLoss(alpha=args.alpha, T=args.T), optimizer=Adam(lr=args.lr)) # run training iterator = BatchIterator(batch_size=args.batch_size) for epoch in range(args.num_ep): t_start = time.time() for i, batch in enumerate(iterator(train_x, train_y)): pred = student.forward(batch.inputs) teacher_out = teacher.forward(batch.inputs) teacher_out_prob = softmax(teacher_out, t=args.T) loss = student.loss.loss(pred, batch.targets, teacher_out_prob) grad_from_loss = student.loss.grad(pred, batch.targets, teacher_out_prob) grads = student.net.backward(grad_from_loss) student.apply_grads(grads) print("Epoch %d time cost: %.4f" % (epoch, time.time() - t_start)) # evaluate student.set_phase("TEST") hit, total = 0, 0 for i, batch in enumerate(iterator(test_x, test_y)): pred = student.forward(batch.inputs) res = accuracy(np.argmax(pred, 1), np.argmax(batch.targets, 1)) hit += res["hit_num"] total += res["total_num"] print("accuracy: %.4f" % (1.0 * hit / total)) student.set_phase("TRAIN") # save the distilled model if not os.path.isdir(args.model_dir): os.makedirs(args.model_dir) model_path = os.path.join(args.model_dir, "distill-%d.model" % args.T) student.save(model_path) print("model saved in %s" % model_path)
def _train_one_epoch(self): start_params = self.get_params() iterator = BatchIterator(batch_size=batch_size) for batch in iterator(self.dataset["train_x"], self.dataset["train_y"]): pred = self.nn_model.forward(batch.inputs) loss, grads = self.nn_model.backward(pred, batch.targets) self.nn_model.apply_grad(grads) end_params = self.get_params() updates = decode(encode(end_params) - encode(start_params)) return updates
def test_batch_iterator(): batch_size = 10 n_data = 10 * batch_size # 10 batches iterator = BatchIterator(batch_size=batch_size) x_dim, y_dim = 10, 5 fake_x = np.random.randint(0, 100, size=(n_data, x_dim)) fake_y = np.random.randint(0, 100, size=(n_data, y_dim)) n_batches = 0 for batch_x, batch_y in iterator(fake_x, fake_y): assert batch_x.shape == (batch_size, x_dim) assert batch_y.shape == (batch_size, y_dim) n_batches += 1 assert n_batches == 10
def main(args): if args.seed >= 0: random_seed(args.seed) # data preparing train_x, train_y, img_shape = prepare_dataset(args.img) net = Net([ Dense(30), ReLU(), Dense(100), ReLU(), Dense(100), ReLU(), Dense(30), ReLU(), Dense(3), Sigmoid() ]) model = Model(net=net, loss=MSE(), optimizer=Adam()) iterator = BatchIterator(batch_size=args.batch_size) for epoch in range(args.num_ep): for batch in iterator(train_x, train_y): preds = model.forward(batch.inputs) loss, grads = model.backward(preds, batch.targets) model.apply_grads(grads) # evaluate preds = net.forward(train_x) mse = mean_square_error(preds, train_y) print("Epoch %d %s" % (epoch, mse)) # generate painting if epoch % 5 == 0: preds = preds.reshape(img_shape[0], img_shape[1], -1) preds = (preds * 255.0).astype("uint8") name, ext = os.path.splitext(args.img) filename = os.path.basename(name) out_filename = filename + "-paint-epoch" + str(epoch) + ext if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) out_path = os.path.join(args.output_dir, out_filename) Image.fromarray(preds).save(out_path) print("save painting to %s" % out_path)
def __init__(self, model, train_set): self.model = model self.train_set = train_set self.iterator = BatchIterator(batch_size=args.batch_size) self.batch_gen = None
def train(args): # prepare dataset train_, valid, test = mnist(args.data_dir) X = np.concatenate([train_[0], valid[0], test[0]]) y = np.concatenate([train_[1], valid[1], test[1]]) if args.model_type == "cnn": X = X.reshape((-1, 28, 28, 1)) G_net, D_net = G_cnn(), D_cnn() elif args.model_type == "mlp": G_net, D_net = G_mlp(), D_mlp() else: raise ValueError("Invalid argument: model_type") fix_noise = get_noise(size=(args.batch_size, args.nz)) loss = SigmoidCrossEntropy() G = Model(net=G_net, loss=loss, optimizer=Adam(args.lr_g, beta1=args.beta1)) D = Model(net=D_net, loss=loss, optimizer=Adam(args.lr_d, beta1=args.beta1)) running_g_err, running_d_err = 0, 0 iterator = BatchIterator(batch_size=args.batch_size) for epoch in range(args.num_ep): for i, batch in enumerate(iterator(X, y)): # --- Train Discriminator --- # feed with real data (maximize log(D(x))) d_pred_real = D.forward(batch.inputs) label_real = np.ones_like(d_pred_real) d_real_err, d_real_grad = D.backward(d_pred_real, label_real) # feed with fake data (maximize log(1 - D(G(z)))) noise = get_noise(size=(len(batch.inputs), args.nz)) g_out = G.forward(noise) d_pred_fake = D.forward(g_out) label_fake = np.zeros_like(d_pred_fake) d_fake_err, d_fake_grad = D.backward(d_pred_fake, label_fake) # train D d_err = d_real_err + d_fake_err d_grads = d_real_grad + d_fake_grad D.apply_grads(d_grads) # ---- Train Generator --- # maximize log(D(G(z))) d_pred_fake = D.forward(g_out) g_err, d_grad = D.backward(d_pred_fake, label_real) g_grads = G.net.backward(d_grad.wrt_input) G.apply_grads(g_grads) running_d_err = 0.9 * running_d_err + 0.1 * d_err running_g_err = 0.9 * running_g_err + 0.1 * g_err if i % 100 == 0: print("epoch-%d iter-%d d_err: %.4f g_err: %.4f" % (epoch + 1, i + 1, running_d_err, running_g_err)) # sampling print("epoch: %d/%d d_err: %.4f g_err: %.4f" % (epoch + 1, args.num_ep, running_d_err, running_g_err)) samples = G.forward(fix_noise) img_name = "ep%d.png" % (epoch + 1) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) save_path = os.path.join(args.output_dir, img_name) save_batch_as_images(save_path, samples) # save generator model_path = os.path.join(args.output_dir, args.model_name) G.save(model_path) print("Saving generator ", model_path)
def main(args): if args.seed >= 0: random_seed(args.seed) train_set, _, test_set = mnist(args.data_dir, one_hot=True) train_x, train_y = train_set test_x, test_y = test_set if args.model_type == "mlp": # A multilayer perceptron model net = Net([ Dense(200), ReLU(), Dense(100), ReLU(), Dense(70), ReLU(), Dense(30), ReLU(), Dense(10) ]) elif args.model_type == "cnn": # A LeNet-5 model with activation function changed to ReLU train_x = train_x.reshape((-1, 28, 28, 1)) test_x = test_x.reshape((-1, 28, 28, 1)) net = Net([ Conv2D(kernel=[5, 5, 1, 6], stride=[1, 1]), ReLU(), MaxPool2D(pool_size=[2, 2], stride=[2, 2]), Conv2D(kernel=[5, 5, 6, 16], stride=[1, 1]), ReLU(), MaxPool2D(pool_size=[2, 2], stride=[2, 2]), Flatten(), Dense(120), ReLU(), Dense(84), ReLU(), Dense(10) ]) elif args.model_type == "rnn": # A simple recurrent neural net to classify images. train_x = train_x.reshape((-1, 28, 28)) test_x = test_x.reshape((-1, 28, 28)) net = Net([RNN(num_hidden=50, activation=Tanh()), Dense(10)]) else: raise ValueError("Invalid argument: model_type") model = Model(net=net, loss=SoftmaxCrossEntropy(), optimizer=Adam(lr=args.lr)) iterator = BatchIterator(batch_size=args.batch_size) loss_list = list() for epoch in range(args.num_ep): t_start = time.time() for batch in iterator(train_x, train_y): pred = model.forward(batch.inputs) loss, grads = model.backward(pred, batch.targets) model.apply_grads(grads) loss_list.append(loss) print("Epoch %d time cost: %.4f" % (epoch, time.time() - t_start)) # evaluate model.set_phase("TEST") test_pred = model.forward(test_x) test_pred_idx = np.argmax(test_pred, axis=1) test_y_idx = np.argmax(test_y, axis=1) res = accuracy(test_pred_idx, test_y_idx) print(res) model.set_phase("TRAIN") # save model if not os.path.isdir(args.model_dir): os.makedirs(args.model_dir) model_name = "mnist-%s-epoch%d.pkl" % (args.model_type, args.num_ep) model_path = os.path.join(args.model_dir, model_name) model.save(model_path) print("model saved in %s" % model_path)
def main(args): if args.seed >= 0: random_seed(args.seed) # create output directory for saving result images if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # prepare and read dataset train_set, _, test_set = mnist(args.data_dir) train_x, train_y = train_set test_x, test_y = test_set # specify the encoder and decoder net structure encoder_net = Net([ Dense(256), ReLU(), Dense(64) ]) decoder_net = Net([ ReLU(), Dense(256), Tanh(), Dense(784), Tanh() ]) nets = (encoder_net, decoder_net) optimizers = (Adam(args.lr), Adam(args.lr)) model = AutoEncoder(nets, loss=MSE(), optimizer=optimizers) # for pre-trained model, test generated images from latent space if args.load_model is not None: # load pre-trained model model.load(os.path.join(args.output_dir, args.load_model)) print("Loaded model fom %s" % args.load_model) # transition from test[from_idx] to test[to_idx] in n steps idx_arr, n = [2, 4, 32, 12, 82], 160 print("Transition in numbers", [test_y[i] for i in idx_arr], "in %d steps ..." % n) stops = [model.en_net.forward(test_x[i]) for i in idx_arr] k = int(n / (len(idx_arr) - 1)) # number of code per transition # generate all transition codes code_arr = [] for i in range(len(stops) - 1): t = [c.copy() for c in transition(stops[i], stops[i+1], k)] code_arr += t # apply decoding all n "code" from latent space... batch = None for code in code_arr: # translate latent space to image genn = model.de_net.forward(code) # save decoded results in a batch if batch is None: batch = np.array(genn) else: batch = np.concatenate((batch, genn)) output_path = os.path.join(args.output_dir, "genn-latent.png") save_batch_as_images(output_path, batch) quit() # train the auto-encoder iterator = BatchIterator(batch_size=args.batch_size) for epoch in range(args.num_ep): for batch in iterator(train_x, train_y): origin_in = batch.inputs # make noisy inputs m = origin_in.shape[0] # batch size mu = args.gaussian_mean # mean sigma = args.gaussian_std # standard deviation noises = np.random.normal(mu, sigma, (m, 784)) noises_in = origin_in + noises # noisy inputs # forward genn = model.forward(noises_in) # back-propagate loss, grads = model.backward(genn, origin_in) # apply gradients model.apply_grads(grads) print("Epoch: %d Loss: %.3f" % (epoch, loss)) # save all the generated images and original inputs for this batch noises_in_path = os.path.join( args.output_dir, "ep%d-input.png" % epoch) genn_path = os.path.join( args.output_dir, "ep%d-genn.png" % epoch) save_batch_as_images(noises_in_path, noises_in, titles=batch.targets) save_batch_as_images(genn_path, genn, titles=batch.targets) # save the model after training model.save(os.path.join(args.output_dir, args.save_model))