def train(**params): print("** Loading training images") start = time.time() lr_hr_ds, n_data = data.load_train_dataset(params['lr_dir'], params['hr_dir'], params['ext'], params['batch_size']) val_lr_hr_ds, n_val_data = data.load_test_dataset(params['val_lr_dir'], params['val_hr_dir'], params['val_ext'], params['val_batch_size']) print("Finish loading images in %.2fs" % (time.time() - start)) one_gpu_model, gpu_model = prepare_model(**params) exp_folder = make_exp_folder(params['exp_dir'], params['arc']) save_params(exp_folder, **params) tb_callback = make_tb_callback(exp_folder) lr_callback = make_lr_callback(params['lr_init'], params['lr_decay'], params['lr_decay_at_steps']) cp_callback = make_cp_callback(exp_folder, one_gpu_model) gpu_model.fit(lr_hr_ds, epochs=params['epochs'], steps_per_epoch=num_iter_per_epoch(n_data, params['batch_size']), callbacks=[tb_callback, cp_callback, lr_callback], initial_epoch=params['init_epoch'], validation_data=val_lr_hr_ds, validation_steps=n_val_data) one_gpu_model.save_weights(os.path.join(exp_folder, 'final_model.h5')) K.clear_session()
def train(self, fname, dataset, sess_info, epochs): (sess, saver) = sess_info f = open_file(fname) iterep = 500 for i in range(iterep * epochs): batch = dataset.train.next_batch(100) sess.run(self.train_step, feed_dict={'x:0': batch}) progbar(i, iterep) if (i + 1) % iterep == 0: a, b = sess.run( [self.nent, self.loss], feed_dict={ 'x:0': dataset.train.data[np.random.choice( len(dataset.train.data), 200)] }) c, d = sess.run([self.nent, self.loss], feed_dict={'x:0': dataset.test.data}) a, b, c, d = -a.mean(), b.mean(), -c.mean(), d.mean() e = test_acc(dataset, sess, self.qy_logit) string = ( '{:>10s},{:>10s},{:>10s},{:>10s},{:>10s},{:>10s}'.format( 'tr_ent', 'tr_loss', 't_ent', 't_loss', 't_acc', 'epoch')) stream_print(f, string, i <= iterep) string = ('{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10d}' .format(a, b, c, d, e, int((i + 1) / iterep))) stream_print(f, string) # Saves parameters every 10 epochs if (i + 1) % (10 * iterep) == 0: print('saving') save_params(saver, sess, (i + 1) // iterep) if f is not None: f.close()
def run(self): """ Script for initializing training/testing process """ global_step = tf.Variable(0, trainable=False) optimizer = tf.train.AdamOptimizer(self.learning_rate) deconv_mult = lambda grads: list(map(lambda x: (x[0] * 1.0, x[1]) if 'deconv' in x[1].name else x, grads)) grads = deconv_mult(optimizer.compute_gradients(self.loss)) self.train_op = optimizer.apply_gradients(grads, global_step=global_step) tf.global_variables_initializer().run() # Load previous model checkpoints if exists if self.load(): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if self.params: save_params(self.sess, self.model.model_params) elif self.train: # Train and test run sequentially self.run_train() self.run_test() else: self.run_test()
def main(num_epochs=20): print("Building model and compiling functions...") input_var = T.tensor4('inputs') fcae = build_fcae(input_var) output = nn.layers.get_output(fcae['output']) output_det = nn.layers.get_output(fcae['output'], deterministic=True) loss = nn.objectives.binary_crossentropy(output, input_var).mean() test_loss = nn.objectives.binary_crossentropy(output_det, input_var).mean() # ADAM updates params = nn.layers.get_all_params(fcae['output'], trainable=True) updates = nn.updates.adam(loss, params, learning_rate=1e-3) train_fn = theano.function([input_var], loss, updates=updates) val_fn = theano.function([input_var], test_loss) ae_fn = theano.function([input_var], nn.layers.get_output(fcae['output'])) data = u.DataH5PyStreamer(os.path.join(c.external_data, 'mnist.hdf5'), batch_size=128) hist = u.train_with_hdf5( data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn, max_per_epoch=40, tr_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=0.), te_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=0.)) u.save_params(fcae['output'], 'fcae_params_{}.npz'.format(np.asarray(hist)[-1, -1])) from PIL import Image from matplotlib import pyplot as plt streamer = data.streamer() imb = next(streamer.get_epoch_iterator()) batch = u.raw_to_floatX(imb[0], pixel_shift=0.).transpose((0, 1, 3, 2)) orig_dim = 28 im = Image.new("RGB", (orig_dim * 20, orig_dim * 20)) for j in xrange(10): dim = orig_dim orig_im = Image.fromarray( u.get_picture_array(batch, np.random.randint(batch.shape[0]), shift=0.0)) im.paste(orig_im.resize((2 * orig_dim, 2 * orig_dim), Image.ANTIALIAS), box=(0, j * orig_dim * 2)) new_im = {} for i in xrange(9): new_im = orig_im.resize((dim, dim), Image.ANTIALIAS) new_im = ae_fn( u.arr_from_img(new_im, shift=0.).reshape(1, -1, dim, dim)) new_im = Image.fromarray(u.get_picture_array(new_im, 0, shift=0.))\ .resize((orig_dim*2, orig_dim*2), Image.ANTIALIAS) im.paste(new_im, box=((i + 1) * orig_dim * 2, j * orig_dim * 2)) dim = int(dim * 1.2) im.save('increasing_size_autoencoded.jpg')
def train(self, params, train, dev, test): start_time = time.time() counter = 0 try: for eidx in xrange(params.epochs): kf = utils.get_minibatches_idx(len(train), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [train[t] for t in train_index] vocab = self.get_word_arr(batch) y = self.get_y(batch) x, xmask = self.prepare_data(self.populate_embeddings_words(batch, vocab)) idxs = self.get_idxs(xmask) if params.nntype == "charlstm" or params.nntype == "charcnn": char_indices = self.populate_embeddings_characters(vocab) if params.nntype == "charagram": char_hash = self.populate_embeddings_characters_charagram(vocab) if params.nntype == "charlstm": c, cmask = self.prepare_data(char_indices) if params.nntype == "charcnn": c = self.prepare_data_conv(char_indices) if params.nntype == "charlstm": cost = self.train_function(c, cmask, x, xmask, idxs, y) if params.nntype == "charcnn": cost = self.train_function(c, x, xmask, idxs, y) if params.nntype == "charagram": cost = self.train_function(char_hash, x, xmask, idxs, y) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost if(params.save): counter += 1 utils.save_params(self, params.outfile+str(counter)+'.pickle') if(params.evaluate): devscore = self.evaluate(dev, params) testscore = self.evaluate(test, params) trainscore = self.evaluate(train, params) print "accuracy: ", devscore, testscore, trainscore print 'Epoch ', (eidx+1), 'Cost ', cost except KeyboardInterrupt: print "Training interrupted" end_time = time.time() print "total time:", (end_time - start_time)
def save_params_to_file(self, fname: str): """ Saves model parameters to file. :param fname: Path to save parameters to. """ assert self._is_built utils.save_params(self.params.copy(), fname) logging.info('Saved params to "%s"', fname)
def main(num_epochs = 20): print("Building model and compiling functions...") input_var = T.tensor4('inputs') fcae = build_fcae(input_var) output = nn.layers.get_output(fcae['output']) output_det = nn.layers.get_output(fcae['output'], deterministic=True) loss = nn.objectives.binary_crossentropy(output, input_var).mean() test_loss = nn.objectives.binary_crossentropy(output_det, input_var).mean() # ADAM updates params = nn.layers.get_all_params(fcae['output'], trainable=True) updates = nn.updates.adam(loss, params, learning_rate=1e-3) train_fn = theano.function([input_var], loss, updates=updates) val_fn = theano.function([input_var], test_loss) ae_fn = theano.function([input_var], nn.layers.get_output(fcae['output'])) data = u.DataH5PyStreamer(os.path.join(c.external_data, 'mnist.hdf5'), batch_size=128) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn = train_fn, test_fn = val_fn, max_per_epoch=40, tr_transform = lambda x: u.raw_to_floatX(x[0], pixel_shift=0.), te_transform = lambda x: u.raw_to_floatX(x[0], pixel_shift=0.)) u.save_params(fcae['output'], 'fcae_params_{}.npz'.format(np.asarray(hist)[-1,-1])) from PIL import Image from matplotlib import pyplot as plt streamer = data.streamer() imb = next(streamer.get_epoch_iterator()) batch = u.raw_to_floatX(imb[0], pixel_shift=0.).transpose((0,1,3,2)) orig_dim = 28 im = Image.new("RGB", (orig_dim*20, orig_dim*20)) for j in xrange(10): dim = orig_dim orig_im = Image.fromarray(u.get_picture_array(batch, np.random.randint(batch.shape[0]), shift=0.0)) im.paste(orig_im.resize((2*orig_dim, 2*orig_dim), Image.ANTIALIAS), box=(0,j*orig_dim*2)) new_im = {} for i in xrange(9): new_im = orig_im.resize((dim, dim), Image.ANTIALIAS) new_im = ae_fn(u.arr_from_img(new_im, shift=0.).reshape(1,-1,dim,dim)) new_im = Image.fromarray(u.get_picture_array(new_im, 0, shift=0.))\ .resize((orig_dim*2, orig_dim*2), Image.ANTIALIAS) im.paste(new_im, box=((i+1)*orig_dim*2, j*orig_dim*2)) dim = int(dim * 1.2) im.save('increasing_size_autoencoded.jpg')
def main(specstr=default_specstr, z_dim=256, num_epochs=10, ch=3, init_from='', img_size=64, pxsh=0.5, data_file='', batch_size=8, save_to='params'): # build expressions for the output, loss, gradient input_var = T.tensor4('inputs') print('building specstr {} - zdim {}'.format(specstr, z_dim)) cae = m.build_cae_nopoolinv(input_var, shape=(img_size,img_size), channels=ch, specstr=specstr.format(z_dim)) l_list = nn.layers.get_all_layers(cae) pred = nn.layers.get_output(cae) loss = nn.objectives.squared_error(pred, input_var.flatten(2)).mean() params = nn.layers.get_all_params(cae, trainable=True) grads = nn.updates.total_norm_constraint(T.grad(loss, params), 10) updates = nn.updates.adam(grads, params, learning_rate=1e-3) te_pred = nn.layers.get_output(cae, deterministic=True) te_loss = nn.objectives.squared_error(te_pred, input_var.flatten(2)).mean() # training functions print('compiling functions') train_fn = theano.function([input_var], loss, updates=updates) val_fn = theano.function([input_var], te_loss) # compile functions for encode/decode to test later enc_layer = l_list[next(i for i in xrange(len(l_list)) if l_list[i].name=='encode')] enc_fn = theano.function([input_var], nn.layers.get_output(enc_layer, deterministic=True)) dec_fn = lambda z: nn.layers.get_output(cae, deterministic=True, inputs={l_list[0]:np.zeros((z.shape[0],ch,img_size,img_size),dtype=theano.config.floatX), enc_layer:z}).eval().reshape(-1,ch,img_size,img_size) # load params if requested, run training if len(init_from) > 0: print('loading params from {}'.format(init_from)) load_params(cae, init_from) data = u.DataH5PyStreamer(data_file, batch_size=batch_size) print('training for {} epochs'.format(num_epochs)) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn = train_fn, test_fn = val_fn, tr_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=pxsh, center=False), te_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=pxsh, center=True)) # generate examples, save training history te_stream = data.streamer(shuffled=True) imb, = next(te_stream.get_epoch_iterator()) tg = u.raw_to_floatX(imb, pixel_shift=pxsh, square=True, center=True) pr = dec_fn(enc_fn(tg)) for i in range(pr.shape[0]): u.get_image_pair(tg, pr,index=i,shift=pxsh).save('output_{}.jpg'.format(i)) hist = np.asarray(hist) np.savetxt('cae_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f') u.save_params(cae, os.path.join(save_to, 'cae_{}.npz'.format(hist[-1,-1])))
def callback_fn(self, params): print("Function value: ", end='') loss = objective_function(params, self.X, self.y, self.lambd) print(loss) print("Average gradient: ", end='') avg_grad = np.mean( d_optimization_function(params, self.X, self.y, self.lambd)**2) print(avg_grad) print() self.iters += 1 save_params(params, self.filename, self.iters) save_losses(loss, self.loss_filename, self.iters)
def run(self): self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss) tf.global_variables_initializer().run() if self.load(): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if self.params: save_params(self.sess, self.model.weights, self.model.biases, self.model.alphas, self.model.model_params) elif self.train: self.run_train() else: self.run_test()
def evaluate(eval_dir, method, train_features, train_labels, test_features, test_labels, **kwargs): if method == 'svm': acc_train, acc_test = svm(train_features, train_labels, test_features, test_labels) elif method == 'knn': acc_train, acc_test = knn(train_features, train_labels, test_features, test_labels, **kwargs) elif method == 'nearsub': acc_train, acc_test = nearsub(train_features, train_labels, test_features, test_labels, **kwargs) elif method == 'nearsub_pca': acc_train, acc_test = knn(train_features, train_labels, test_features, test_labels, **kwargs) acc_dict = {'train': acc_train, 'test': acc_test} utils.save_params(eval_dir, acc_dict, name=f'acc_{method}')
def run(self): # SGD with momentum # self.train_op = tf.train.MomentumOptimizer(self.learning_rate, self.momentum).minimize(self.loss) self.train_op = tf.train.AdadeltaOptimizer(learning_rate=self.learning_rate).minimize(self.loss) tf.initialize_all_variables().run() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if self.params: save_params(self.sess, self.weights, self.biases) elif self.train: self.run_train() else: self.run_test()
def run(self): self.train_op = tf.train.AdamOptimizer().minimize(self.loss) tf.initialize_all_variables().run() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if self.params: s, d, m = self.model_params save_params(self.sess, self.weights, self.biases, self.alphas, s, d, m) elif self.train: self.run_train() else: self.run_test()
def main(L=2, img_size=64, pxsh=0., z_dim=32, n_hid=1024, num_epochs=12, binary='True', init_from='', data_file='', batch_size=128, save_to='params', max_per_epoch=-1): binary = binary.lower()=='true' # Create VAE model input_var = T.tensor4('inputs') print("Building model and compiling functions...") print("L = {}, z_dim = {}, n_hid = {}, binary={}".format(L, z_dim, n_hid, binary)) l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vcae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=n_hid) if len(init_from) > 0: print('loading from {}'.format(init_from)) load_params(l_x, init_from) # compile functions loss, _ = u.build_vae_loss(input_var, *l_tup, deterministic=False, binary=binary, L=L) test_loss, test_prediction = u.build_vae_loss(input_var, *l_tup, deterministic=True, binary=binary, L=L) params = nn.layers.get_all_params(l_x, trainable=True) updates = nn.updates.adam(loss, params, learning_rate=3e-5) train_fn = theano.function([input_var], loss, updates=updates) val_fn = theano.function([input_var], test_loss) ae_fn = theano.function([input_var], test_prediction) # run training loop print('training for {} epochs'.format(num_epochs)) data = u.DataH5PyStreamer(data_file, batch_size=batch_size) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn, max_per_epoch=max_per_epoch, tr_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=pxsh, center=False), te_transform=lambda x: u.raw_to_floatX(x[0], pixel_shift=pxsh, center=True)) # generate examples, save training history te_stream = data.streamer(shuffled=True) imb, = next(te_stream.get_epoch_iterator()) orig_images = u.raw_to_floatX(imb, pixel_shift=pxsh) autoencoded_images = ae_fn(orig_images) for i in range(autoencoded_images.shape[0]): u.get_image_pair(orig_images, autoencoded_images, index=i, shift=pxsh) \ .save('output_{}.jpg'.format(i)) hist = np.asarray(hist) np.savetxt('vcae_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f') u.save_params(l_x, os.path.join(save_to, 'vcae_{}.npz'.format(hist[-1,-1])))
def train(params=None): os.makedirs(params['ckpt_path'], exist_ok=True) device = torch.device("cuda") train_dataset = HDRDataset(params['dataset'], params=params, suffix=params['dataset_suffix']) train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True) model = HDRPointwiseNN(params=params) ckpt = get_latest_ckpt(params['ckpt_path']) if ckpt: print('Loading previous state:', ckpt) state_dict = torch.load(ckpt) state_dict, _ = load_params(state_dict) model.load_state_dict(state_dict) model.to(device) mseloss = torch.nn.MSELoss() optimizer = Adam(model.parameters(), params['lr']) count = 0 for e in range(params['epochs']): model.train() for i, (low, full, target) in enumerate(train_loader): optimizer.zero_grad() low = low.to(device) full = full.to(device) t = target.to(device) res = model(low, full) total_loss = mseloss(res, t) total_loss.backward() if (count + 1) % params['log_interval'] == 0: _psnr = psnr(res, t).item() loss = total_loss.item() print(e, count, loss, _psnr) optimizer.step() if (count + 1) % params['ckpt_interval'] == 0: print('@@ MIN:', torch.min(res), 'MAX:', torch.max(res)) model.eval().cpu() ckpt_model_filename = "ckpt_" + str(e) + '_' + str( count) + ".pth" ckpt_model_path = os.path.join(params['ckpt_path'], ckpt_model_filename) state = save_params(model.state_dict(), params) torch.save(state, ckpt_model_path) test(ckpt_model_path) model.to(device).train() count += 1
def run(self): # SGD with momentum # self.train_op = tf.train.MomentumOptimizer(self.learning_rate, self.momentum).minimize(self.loss) # Now we use the Adam-Optimizer instead of SGD (Statistical Gradient Decent) self.train_op = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.loss) tf.initialize_all_variables().run() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if self.params: save_params(self.sess, self.weights, self.biases) elif self.train: self.run_train() else: self.run_test()
def run(self): global_step = tf.Variable(0, trainable=False) optimizer = tf.train.AdamOptimizer(self.learning_rate) deconv_mult = lambda grads: list(map(lambda x: (x[0] * 1.0, x[1]) if 'deconv' in x[1].name else x, grads)) grads = deconv_mult(optimizer.compute_gradients(self.loss)) self.train_op = optimizer.apply_gradients(grads, global_step=global_step) tf.global_variables_initializer().run() if self.load(): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if self.params: save_params(self.sess, self.model.model_params) elif self.train: self.run_train() else: self.run_test()
def train(self): num_minibatches = len(self.mnist.train.x) / self.minibatch_size for epoch in xrange(self.num_epochs): for mb_index in xrange(num_minibatches): mb_x = self.mnist.train.x \ [mb_index : mb_index+self.minibatch_size] mb_x = mb_x.reshape((self.minibatch_size, 1, 28, 28)) #E_h = crbm.E_h_given_x_func(mb_x) #print "Shape of E_h", E_h.shape cd_return = self.crbm.CD_step(mb_x) sp_return = self.crbm.sparsity_step(mb_x) self.series['cd'].append( \ (epoch, mb_index), cd_return) self.series['sparsity'].append( \ (epoch, mb_index), sp_return) total_idx = epoch*num_minibatches + mb_index if (total_idx+1) % REDUCE_EVERY == 0: self.series['params'].append( \ (epoch, mb_index), self.crbm.params) if total_idx % VISUALIZE_EVERY == 0: self.visualize_gibbs_result(\ mb_x, GIBBS_STEPS_IN_VIZ_CHAIN, "gibbs_chain_"+str(epoch)+"_"+str(mb_index)) self.visualize_gibbs_result(mb_x, 1, "gibbs_1_"+str(epoch)+"_"+str(mb_index)) self.visualize_filters( "filters_"+str(epoch)+"_"+str(mb_index)) if TEST_CONFIG: # do a single epoch for cluster tests config break if SAVE_PARAMS: utils.save_params(self.crbm.params, "params.pkl")
def save_params(self, dir_path="", epoch=None): param_saving_path = save_params(dir_path=dir_path, name=self.name, epoch=epoch, params=self.params, aux_states=self.aux_states) misc_saving_path = save_misc( dir_path=dir_path, epoch=epoch, name=self.name, content={ 'data_shapes': {k: list(map(int, v)) for k, v in self.data_shapes.items()} }) logging.info('Saving %s, params: \"%s\", misc: \"%s\"', self.name, param_saving_path, misc_saving_path)
def index(request): db = Database('banco-notes') if request.startswith('POST'): params = save_params( request ) #params = {'titulo':algm coisa, 'detalhes':outra coisa, 'id':outra coisa} #add_to_jsonfile(params, 'notes.json') if 'deletar' in params.keys(): db.delete(params['deletar']) #id elif params['id'] == 'None': db.add( Note(title=params['titulo'], content=params['detalhes'], id=params['id'])) else: db.update( Note(title=params['titulo'], content=params['detalhes'], id=params['id'])) return build_response(code=303, reason='See Other', headers='Location: /') else: # Cria uma lista de <li>'s para cada anotação print('notas do banco: ', db.get_all()) note_template = load_template('components/note.html') notes_li = [ note_template.format(id=note.id, title=note.title, details=note.content) for note in db.get_all() ] notes = '\n'.join(notes_li) print('----------------------------------------') return build_response(load_template('index.html').format(notes=notes))
def main(data_name, vae_type, dimZ, dimH, n_iter, batch_size, K, checkpoint): dimY = 10 if vae_type == 'A': from conv_generator_mnist_A import generator if vae_type == 'B': from conv_generator_mnist_B import generator if vae_type == 'C': from conv_generator_mnist_C import generator if vae_type == 'D': from conv_generator_mnist_D import generator if vae_type == 'E': from conv_generator_mnist_E import generator if vae_type == 'F': from conv_generator_mnist_F import generator if vae_type == 'G': from conv_generator_mnist_G import generator from conv_encoder_mnist import encoder_gaussian as encoder shape_high = (28, 28) input_shape = (28, 28, 1) n_channel = 64 # then define model dec = generator(input_shape, dimH, dimZ, dimY, n_channel, 'sigmoid', 'gen') enc, enc_conv, enc_mlp = encoder(input_shape, dimH, dimZ, dimY, n_channel, 'enc') # define optimisers X_ph = tf.placeholder(tf.float32, shape=(batch_size, ) + input_shape) Y_ph = tf.placeholder(tf.float32, shape=(batch_size, dimY)) ll = 'l2' fit, eval_acc = construct_optimizer(X_ph, Y_ph, [enc_conv, enc_mlp], dec, ll, K, vae_type) # load data from utils_mnist import data_mnist X_train, Y_train, X_test, Y_test = data_mnist(train_start=0, train_end=60000, test_start=0, test_end=10000) # initialise sessions config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) if not os.path.isdir('save/'): os.mkdir('save/') print('create path save/') path_name = data_name + '_conv_vae_%s/' % (vae_type + '_' + str(dimZ)) if not os.path.isdir('save/' + path_name): os.mkdir('save/' + path_name) print('create path save/' + path_name) filename = 'save/' + path_name + 'checkpoint' if checkpoint < 0: print('training from scratch') init_variables(sess) else: load_params(sess, filename, checkpoint) checkpoint += 1 # now start fitting n_iter_ = min(n_iter, 20) beta = 1.0 for i in range(int(n_iter / n_iter_)): fit(sess, X_train, Y_train, n_iter_, lr, beta) # print training and test accuracy eval_acc(sess, X_test, Y_test, 'test') # save param values save_params(sess, filename, checkpoint) checkpoint += 1
type=str, default='', help='extra information to add to folder name') parser.add_argument( '--save_dir', type=str, default='./saved_models/', help='base directory for saving PyTorch model. (default: ./saved_models/)') args = parser.parse_args() # pipeline setup model_dir = os.path.join( args.save_dir, "iris", "layers{}_eps{}_eta{}" "".format(args.layers, args.eps, args.eta)) os.makedirs(model_dir, exist_ok=True) utils.save_params(model_dir, vars(args)) # data setup X_train, y_train, X_test, y_test, num_classes = dataset.load_Iris(0.3) # model setup layers = [Vector(args.layers, eta=args.eta, eps=args.eps)] model = Architecture(layers, model_dir, num_classes) # train/test pass print("Forward pass - train features") Z_train = model(X_train, y_train) utils.save_loss(model.loss_dict, model_dir, "train") print("Forward pass - test features") Z_test = model(X_test) utils.save_loss(model.loss_dict, model_dir, "test")
def main(args): logging.info('-' * 50) logging.info('Load data files..') question_belong = [] if args.debug: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling, question_belong=question_belong) else: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling, question_belong=question_belong) args.num_train = len(train_examples[0]) args.num_dev = len(dev_examples[0]) logging.info('-' * 50) logging.info('Build dictionary..') #word_dict = utils.build_dict(train_examples[0] + train_examples[1] + train_examples[2], args.max_vocab_size) word_dict = pickle.load(open("../obj/dict.pkl", "rb")) logging.info('-' * 50) embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) # EMBEDDING (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, params, all_params = build_fn(args, embeddings) logging.info('Done.') logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_x3, dev_y = utils.vectorize( dev_examples, word_dict, sort_by_len=not args.test_only, concat=args.concat) word_dict_r = {} word_dict_r[0] = "unk" assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, args.batch_size, args.concat) dev_acc, pred = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) best_acc = dev_acc if args.test_only: return utils.save_params(args.model_file, all_params, epoch=0, n_updates=0) # Training logging.info('-' * 50) logging.info('Start training..') train_x1, train_x2, train_x3, train_y = utils.vectorize(train_examples, word_dict, concat=args.concat) assert len(train_x1) == args.num_train start_time = time.time() n_updates = 0 all_train = gen_examples(train_x1, train_x2, train_x3, train_y, args.batch_size, args.concat) for epoch in range(args.num_epoches): np.random.shuffle(all_train) for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3, mb_y) in enumerate(all_train): train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3, mb_y) if idx % 100 == 0: logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1])) logging.info( 'Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time)) n_updates += 1 if n_updates % args.eval_iter == 0: samples = sorted( np.random.choice(args.num_train, min(args.num_train, args.num_dev), replace=False)) sample_train = gen_examples( [train_x1[k] for k in samples], [train_x2[k] for k in samples], [train_x3[k * 4 + o] for k in samples for o in range(4)], [train_y[k] for k in samples], args.batch_size, args.concat) acc, pred = eval_acc(test_fn, sample_train) logging.info('Train accuracy: %.2f %%' % acc) dev_acc, pred = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) if dev_acc > best_acc: best_acc = dev_acc logging.info( 'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, dev_acc)) utils.save_params(args.model_file, all_params, epoch=epoch, n_updates=n_updates)
help='load pretrained checkpoint for assigning labels') parser.add_argument('--pretrain_epo', type=int, default=None, help='load pretrained epoch for assigning labels') args = parser.parse_args() ## Pipelines Setup model_dir = os.path.join( args.save_dir, 'seqsupce_{}+{}_cpb{}_epo{}_bs{}_lr{}_mom{}_wd{}_lcr{}{}'.format( args.arch, args.data, args.cpb, args.epo, args.bs, args.lr, args.mom, args.wd, args.lcr, args.tail)) headers = ["label_batch_id", "epoch", "step", "loss"] utils.init_pipeline(model_dir, headers) utils.save_params(model_dir, vars(args)) ## per model functions def lr_schedule(epoch, optimizer): """decrease the learning rate""" lr = args.lr if epoch >= 400: lr = args.lr * 0.01 elif epoch >= 200: lr = args.lr * 0.1 for param_group in optimizer.param_groups: param_group['lr'] = lr ## Prepare for Training
learning_rate = 0.01 n_train = len(y_train) n_iters = args.epochs * n_train print("Training:", args.epochs, "epochs of", n_train, "iterations") train_loss = numpy.zeros(n_iters) start_time = timeit.default_timer() for epoch in range(args.epochs): for i in range(n_train): iteration = i + n_train * epoch train_loss[iteration] = train_model( numpy.asarray(X_train[i], dtype='int32'), numpy.asarray(y_train[i], dtype='int32'), learning_rate) if (len(train_loss) > 1 and train_loss[-1] > train_loss[-2]): learning_rate = learning_rate * 0.5 print("Setting learning rate to {}".format(learning_rate)) if iteration % args.print_interval == 0: print('epoch {}, minibatch {}/{}, train loss {}'.format( epoch, i, n_train, train_loss[iteration])) numpy.save( "train_loss_{}_{}_h{}_e{}".format(args.mode, args.model, args.hidden, args.epochs), train_loss) numpy.save("index_{}".format(args.mode), index_) print( "Saved index to index_{}.npy. Saved train loss to train_loss_{}_{}_h{}_e{}.npy" .format(args.mode, args.mode, args.model, args.hidden, args.epochs)) save_params( "params_{}_{}_h{}_e{}".format(args.mode, args.model, args.hidden, args.epochs), params)
def train(self, fname, dataset, sess_info, epochs, save_parameters=True, is_labeled=False): history = initialize_history() (sess, saver) = sess_info f = open_file(fname) iterep = 500 for i in range(iterep * epochs): batch = dataset.train.next_batch(100) sess.run(self.train_step, feed_dict={ 'x:0': batch, 'phase:0': True }) progbar(i, iterep) if (i + 1) % iterep == 0: a, b = sess.run( [self.nent, self.loss], feed_dict={ 'x:0': dataset.train.data[np.random.choice( len(dataset.train.data), 200)], 'phase:0': False }) c, d = sess.run([self.nent, self.loss], feed_dict={ 'x:0': dataset.test.data, 'phase:0': False }) a, b, c, d = -a.mean(), b.mean(), -c.mean(), d.mean() e = (0, test_acc(dataset, sess, self.qy_logit))[is_labeled] string = ( '{:>10s},{:>10s},{:>10s},{:>10s},{:>10s},{:>10s}'.format( 'tr_ent', 'tr_loss', 't_ent', 't_loss', 't_acc', 'epoch')) stream_print(f, string, i <= iterep) string = ('{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10.2e},{:10d}' .format(a, b, c, d, e, int((i + 1) / iterep))) stream_print(f, string) qy = sess.run(self.qy, feed_dict={ 'x:0': dataset.test.data, 'phase:0': False }) print('Sample of qy') print(qy[:5]) history['iters'].append(int((i + 1) / iterep)) history['ent'].append(a) history['val_ent'].append(c) history['loss'].append(b) history['val_loss'].append(d) history['val_acc'].append(e) # Saves parameters every 10 epochs if (i + 1) % (10 * iterep) == 0 and save_parameters: print('saving') save_params(saver, sess, (i + 1) // iterep) if f is not None: f.close() return history
} param_set_2 = { 'sr': 44100, 'window_size': 50, 'hop_length': 2205, 'server_subpaths': 'All' } param_set_3 = { 'sr': 44100, 'window_size': 50, 'hop_length': 2205, 'server_subpaths': 'FINAL_East African Popular Music Archive' } # Set the hop length; at 22050 Hz, 512 samples ~= 23ms # at 44100Hz, for 50ms use 2205 as hop_length save_params(preproc_path, **param_set_1) save_params(preproc_path, **param_set_2) save_params(preproc_path, **param_set_3) # ----- Feature extraction ----- # user_confirmation() # Define possible parameters for feature extraction for method in ['mfcc', 'spectrogram']: for duration in [5, 10]: save_params(feature_ext_path, method=method, duration=duration) # Define a set of preprocessing parameters and a set of feature extraction parameters to use preproc_params = 3 feature_ext_params = 1 params_list = [preproc_params, feature_ext_params]
def main(n_hid=256, lstm_layers=2, num_epochs=100, batch_size=32, save_to='output', max_per_epoch=-1): # load current set of words used words = open(c.words_used_file, 'r').readlines() idx_to_words = dict((i+1,w.strip()) for i,w in enumerate(words)) idx_to_words[0] = '<e>' word_dim=len(words)+1 # normalization expected by vgg-net mean_values = np.array([104, 117, 123]).reshape((3,1,1)).astype(theano.config.floatX) # build function for extraction convolutional features img_var = T.tensor4('images') net = m.build_vgg(shape=(c.img_size, c.img_size), input_var=img_var) values = pickle.load(open(c.vgg_weights))['param values'] nn.layers.set_all_param_values(net['pool5'], values) conv_feats = theano.function([img_var], nn.layers.get_output(net['pool5'])) conv_shape = nn.layers.get_output_shape(net['pool5']) # helper function for converting word vector to one-hot raw_word_var = T.matrix('seq_raw') one_hot = theano.function([raw_word_var], nn.utils.one_hot(raw_word_var, m=word_dim)) # build expressions for lstm conv_feats_var = T.tensor4('conv') seq_var = T.tensor3('seq') lstm = m.build_rnn(conv_feats_var, seq_var, conv_shape, word_dim, n_hid, lstm_layers) output = nn.layers.get_output(lstm['output']) output_det = nn.layers.get_output(lstm['output'], deterministic=True) loss = m.categorical_crossentropy_logdomain(output, seq_var).mean() te_loss = m.categorical_crossentropy_logdomain(output_det, seq_var).mean() # compile training functions params = nn.layers.get_all_params(lstm['output'], trainable=True) lr = theano.shared(nn.utils.floatX(1e-3)) updates = nn.updates.adam(loss, params, learning_rate=lr) train_fn = theano.function([conv_feats_var, seq_var], loss, updates=updates) test_fn = theano.function([conv_feats_var, seq_var], te_loss) predict_fn = theano.function([conv_feats_var, seq_var], T.exp(output_det[:,-1:])) zeros = np.zeros((batch_size, 1, word_dim), dtype=theano.config.floatX) def transform_data(imb): y,x = imb # data augmentation: flip = -1 if we do flip over y-axis, 1 if not flip = -2*np.random.binomial(1, p=0.5) + 1 # this vgg-net expects image values that are normalized by mean but not magnitude x = (u.raw_to_floatX(x[:,:,::flip], pixel_shift=0.)\ .transpose(0,1,3,2)[:,::-1] * 255. - mean_values) return conv_feats(x), np.concatenate([zeros, one_hot(y)], axis=1) data = u.DataH5PyStreamer(c.twimg_hdf5_file, batch_size=batch_size) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=test_fn, max_per_epoch=max_per_epoch, tr_transform=transform_data, te_transform=transform_data) np.savetxt('lstm_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f') u.save_params(lstm['output'], os.path.join(save_to, 'lstm_{}.npz'.format(np.asarray(hist)[-1, -1]))) # generate some example captions for one batch of images streamer = data.streamer(training=False, shuffled=True) y_raw, x_raw = next(streamer.get_epoch_iterator()) x, _ = transform_data((y_raw, x_raw)) y = zeros captions = [] for idx in xrange(y.shape[0]): captions.append([]) idx_to_words[0] = '<e>' for sample_num in xrange(c.max_caption_len): pred = predict_fn(x, y) new_y = [] for idx in xrange(pred.shape[0]): # reduce size by a small factor to prevent numerical imprecision from # making it sum to > 1. # reverse it so that <e> gets the additional probability, not a word sample = np.random.multinomial(1, pred[idx,0,::-1]*.999999)[::-1] captions[idx].append(idx_to_words[np.argmax(sample)]) new_y.append(sample) new_y = np.vstack(new_y).reshape(-1,1,word_dim).astype(theano.config.floatX) y = np.concatenate([y, new_y], axis=1) captions = ['{},{}\n'.format(i, ' '.join(cap)) for i,cap in enumerate(captions)] with open(os.path.join(save_to, 'captions_sample.csv'), 'w') as wr: wr.writelines(captions) for idx in xrange(x_raw.shape[0]): Image.fromarray(x_raw[idx].transpose(2,1,0)).save(os.path.join(save_to, 'ex_{}.jpg'.format(idx)))
def main(data_file = '', num_epochs=10, batch_size = 128, L=2, z_dim=256, n_hid=1500, binary='false', img_size = 64, init_from = '', save_to='params', split_layer='conv7', pxsh = 0.5, specstr = c.pf_cae_specstr, cae_weights=c.pf_cae_params, deconv_weights = c.pf_deconv_params): binary = binary.lower() == 'true' # pre-trained function for extracting convolutional features from images cae = m.build_cae(input_var=None, specstr=specstr, shape=(img_size,img_size)) laydict = dict((l.name, l) for l in nn.layers.get_all_layers(cae)) convshape = nn.layers.get_output_shape(laydict[split_layer]) convs_from_img, _ = m.encoder_decoder(cae_weights, specstr=specstr, layersplit=split_layer, shape=(img_size, img_size)) # pre-trained function for returning to images from convolutional features img_from_convs = m.deconvoluter(deconv_weights, specstr=specstr, shape=convshape) # Create VAE model print("Building model and compiling functions...") print("L = {}, z_dim = {}, n_hid = {}, binary={}".format(L, z_dim, n_hid, binary)) input_var = T.tensor4('inputs') c,w,h = convshape[1], convshape[2], convshape[3] l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=n_hid, shape=(w,h), channels=c) if len(init_from) > 0: print("loading from {}".format(init_from)) u.load_params(l_x, init_from) # build loss, updates, training, prediction functions loss,_ = u.build_vae_loss(input_var, *l_tup, deterministic=False, binary=binary, L=L) test_loss, test_prediction = u.build_vae_loss(input_var, *l_tup, deterministic=True, binary=binary, L=L) lr = theano.shared(nn.utils.floatX(1e-5)) params = nn.layers.get_all_params(l_x, trainable=True) updates = nn.updates.adam(loss, params, learning_rate=lr) train_fn = theano.function([input_var], loss, updates=updates) val_fn = theano.function([input_var], test_loss) ae_fn = theano.function([input_var], test_prediction) # run training loop def data_transform(x, do_center): floatx_ims = u.raw_to_floatX(x, pixel_shift=pxsh, square=True, center=do_center) return convs_from_img(floatx_ims) print("training for {} epochs".format(num_epochs)) data = u.DataH5PyStreamer(data_file, batch_size=batch_size) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn, tr_transform=lambda x: data_transform(x[0], do_center=False), te_transform=lambda x: data_transform(x[0], do_center=True)) # generate examples, save training history te_stream = data.streamer(shuffled=True) imb, = next(te_stream.get_epoch_iterator()) orig_feats = data_transform(imb, do_center=True) reconstructed_feats = ae_fn(orig_feats).reshape(orig_feats.shape) orig_feats_deconv = img_from_convs(orig_feats) reconstructed_feats_deconv = img_from_convs(reconstructed_feats) for i in range(reconstructed_feats_deconv.shape[0]): u.get_image_pair(orig_feats_deconv, reconstructed_feats_deconv, index=i, shift=pxsh)\ .save('output_{}.jpg'.format(i)) hist = np.asarray(hist) np.savetxt('vae_convs_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f') u.save_params(l_x, os.path.join(save_to, 'vae_convs_{}.npz'.format(hist[-1,-1])))
def main(data_name, vae_type, fea_layer, n_iter, batch_size, K, checkpoint, data_path): # load data from import_data_cifar10 import load_data_cifar10 X_train, X_test, Y_train, Y_test = load_data_cifar10(data_path, conv=True) dimY = Y_train.shape[1] if vae_type == 'E': from mlp_generator_cifar10_E import generator if vae_type == 'F': from mlp_generator_cifar10_F import generator if vae_type == 'G': from mlp_generator_cifar10_G import generator from mlp_encoder_cifar10 import encoder_gaussian as encoder #first build the feature extractor input_shape = X_train[0].shape sys.path.append('test_attacks/load/') from vgg_cifar10 import cifar10vgg cnn = cifar10vgg(path='test_attacks/load/vgg_model/', train=False) if fea_layer == 'low': N_layer = 16 if fea_layer == 'mid': N_layer = 36 if fea_layer == 'high': N_layer = len(cnn.model.layers) - 5 for layer in cnn.model.layers: print(layer.__class__.__name__) def feature_extractor(x): out = cnn.normalize_production(x * 255.0) for i in range(N_layer): out = cnn.model.layers[i](out) return out print(fea_layer, N_layer, cnn.model.layers[N_layer-1].__class__.__name__, \ cnn.model.layers[N_layer-1].get_config()) # then define model X_ph = tf.placeholder(tf.float32, shape=(batch_size, ) + input_shape) Y_ph = tf.placeholder(tf.float32, shape=(batch_size, dimY)) dimZ = 128 #32 dimH = 1000 fea_op = feature_extractor(X_ph) if len(fea_op.get_shape().as_list()) == 4: fea_op = tf.reshape(fea_op, [batch_size, -1]) dimF = fea_op.get_shape().as_list()[-1] dec = generator(dimF, dimH, dimZ, dimY, 'linear', 'gen') n_layers_enc = 2 enc = encoder(dimF, dimH, dimZ, dimY, n_layers_enc, 'enc') ll = 'l2' identity = lambda x: x fea_ph = tf.placeholder(tf.float32, shape=(batch_size, dimF)) fit, eval_acc = construct_optimizer(fea_ph, Y_ph, [identity, enc], dec, ll, K, vae_type) # initialise sessions config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) path_name = data_name + '_conv_vae_fea_%s_%s/' % (vae_type, fea_layer) if not os.path.isdir('save/' + path_name): os.mkdir('save/' + path_name) print('create path save/' + path_name) filename = 'save/' + path_name + 'checkpoint' if checkpoint < 0: print('training from scratch') init_variables(sess) else: load_params(sess, filename, checkpoint) checkpoint += 1 # set test phase import keras.backend keras.backend.set_session(sess) cnnfile = 'test_attacks/load/vgg_model/cifar10vgg.h5' cnn.model.load_weights(cnnfile) print('load weight from', cnnfile) keras.backend.set_learning_phase(0) # extract features def gen_feature(X): F = [] for i in range(int(X.shape[0] / batch_size)): batch = X[i * batch_size:(i + 1) * batch_size] F.append(sess.run(fea_op, feed_dict={X_ph: batch})) return np.concatenate(F, axis=0) F_train = gen_feature(X_train) F_test = gen_feature(X_test) # now start fitting beta = 1.0 n_iter_ = 10 for i in range(int(n_iter / n_iter_)): fit(sess, F_train, Y_train, n_iter_, lr, beta) # print training and test accuracy eval_acc(sess, F_test, Y_test, 'test', beta) # save param values save_params(sess, filename, checkpoint, scope='vae') checkpoint += 1
def main(data_file='', img_size=64, num_epochs=10, batch_size=128, pxsh=0.5, split_layer='conv7', specstr=c.pf_cae_specstr, cae_params=c.pf_cae_params, save_to='params'): # transform function to go from images -> conv feats conv_feats, _ = m.encoder_decoder(cae_params, specstr=specstr, layersplit=split_layer, shape=(img_size, img_size)) # build pretrained net for images -> convfeats in order to get the input shape # for the reverse function print('compiling functions') conv_net = m.build_cae(input_var=None, specstr=specstr, shape=(img_size, img_size)) cae_layer_dict = dict( (l.name, l) for l in nn.layers.get_all_layers(conv_net)) shape = nn.layers.get_output_shape(cae_layer_dict[split_layer]) # build net for convfeats -> images imgs_var = T.tensor4('images') convs_var = T.tensor4('conv_features') deconv_net = m.build_deconv_net(input_var=convs_var, shape=shape, specstr=specstr) loss = nn.objectives.squared_error( imgs_var, nn.layers.get_output(deconv_net)).mean() te_loss = nn.objectives.squared_error( imgs_var, nn.layers.get_output(deconv_net, deterministic=True)).mean() params = nn.layers.get_all_params(deconv_net, trainable=True) lr = theano.shared(nn.utils.floatX(3e-3)) updates = nn.updates.adam(loss, params, learning_rate=lr) # compile functions train_fn = theano.function([convs_var, imgs_var], loss, updates=updates) val_fn = theano.function([convs_var, imgs_var], te_loss) deconv_fn = theano.function([convs_var], nn.layers.get_output(deconv_net, deterministic=True)) # run training loop print("training for {} epochs".format(num_epochs)) def data_transform(x, do_center): floatx_ims = u.raw_to_floatX(x, pixel_shift=pxsh, square=True, center=do_center) return (conv_feats(floatx_ims), floatx_ims) data = u.DataH5PyStreamer(data_file, batch_size=batch_size) hist = u.train_with_hdf5( data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn, tr_transform=lambda x: data_transform(x[0], do_center=False), te_transform=lambda x: data_transform(x[0], do_center=True)) # generate examples, save training history and params te_stream = data.streamer(shuffled=True) imb, = next(te_stream.get_epoch_iterator()) imb = data_transform(imb, True)[0] result = deconv_fn(imb) for i in range(result.shape[0]): Image.fromarray(u.get_picture_array(result, index=i, shift=pxsh)) \ .save('output_{}.jpg'.format(i)) hist = np.asarray(hist) np.savetxt('deconv_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f') u.save_params(deconv_net, os.path.join(save_to, 'deconv_{}.npz'.format(hist[-1, -1])))
def main(args): logging.info('-' * 50) logging.info('Load data files..') if args.debug: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling) else: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling) args.num_train = len(train_examples[0]) args.num_dev = len(dev_examples[0]) logging.info('-' * 50) logging.info('Build dictionary..') word_dict = utils.build_dict(train_examples[0] + train_examples[1]) entity_markers = list( set([w for w in word_dict.keys() if w.startswith('@entity')] + train_examples[2])) entity_markers = ['<unk_entity>'] + entity_markers entity_dict = {w: index for (index, w) in enumerate(entity_markers)} logging.info('Entity markers: %d' % len(entity_dict)) args.num_labels = len(entity_dict) logging.info('-' * 50) # Load embedding file embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, params = build_fn(args, embeddings) logging.info('Done.') if args.prepare_model: return train_fn, test_fn, params logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict, entity_dict) assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size) dev_acc = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) best_acc = dev_acc if args.test_only: return utils.save_params(args.model_file, params, epoch=0, n_updates=0) # Training logging.info('-' * 50) logging.info('Start training..') train_x1, train_x2, train_l, train_y = utils.vectorize( train_examples, word_dict, entity_dict) assert len(train_x1) == args.num_train start_time = time.time() n_updates = 0 all_train = gen_examples(train_x1, train_x2, train_l, train_y, args.batch_size) for epoch in range(args.num_epoches): np.random.shuffle(all_train) for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) in enumerate(all_train): logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1])) train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) logging.info( 'Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time)) n_updates += 1 if n_updates % args.eval_iter == 0: samples = sorted( np.random.choice(args.num_train, min(args.num_train, args.num_dev), replace=False)) sample_train = gen_examples([train_x1[k] for k in samples], [train_x2[k] for k in samples], train_l[samples], [train_y[k] for k in samples], args.batch_size) logging.info('Train accuracy: %.2f %%' % eval_acc(test_fn, sample_train)) dev_acc = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) if dev_acc > best_acc: best_acc = dev_acc logging.info( 'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, dev_acc)) utils.save_params(args.model_file, params, epoch=epoch, n_updates=n_updates)
def main(args): logging.info('-' * 50) logging.info('Load data files..') if not (args.test_only): logging.info('*' * 10 + ' All') all_examples = utils.load_data(args.all_file, 100, relabeling=args.relabeling) dev_ratio = args.dev_ratio sample_index = np.arange(len(all_examples[0])) random.seed(1000) dev_index = random.sample(sample_index, int(dev_ratio * len(sample_index))) train_index = np.setdiff1d(sample_index, dev_index) dev_examples = tuple_part(all_examples, dev_index) train_examples = tuple_part(all_examples, train_index) #feature preprocessing train_fea_flat_np = FeaExtract(train_examples[-1]) dev_fea_flat_np = FeaExtract(dev_examples[-1]) train_fea_flat_np2, dev_fea_flat_np2 = Prepocessing_func( train_fea_flat_np, dev_fea_flat_np, varian_ratio_tol=args.pca_ratio) train_fea_merge = FeaMerge(train_fea_flat_np2, train_examples[-1]) dev_fea_merge = FeaMerge(dev_fea_flat_np2, dev_examples[-1]) train_examples = train_examples[:-1] + (train_fea_merge, ) dev_examples = dev_examples[:-1] + (dev_fea_merge, ) args.num_train = len(train_examples[0]) else: # logging.info('*' * 10 + ' Train') # train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling) dev_fea_flat_np = FeaExtract(dev_examples[-1]) dev_fea_flat_np2 = PrepocessingApply_func(dev_fea_flat_np) dev_fea_merge = FeaMerge(dev_fea_flat_np2, dev_examples[-1]) dev_examples = dev_examples[:-1] + (dev_fea_merge, ) args.num_dev = len(dev_examples[0]) args.mea_num = dev_examples[4][0].shape[-1] logging.info('-' * 50) logging.info('Build dictionary..') word_dict = pickle.load(open("../../obj/dict.pkl", "rb")) logging.info('-' * 50) embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, params, all_params = build_fn(args, embeddings) logging.info('Done.') logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_x3, dev_y, dev_x4 = utils.vectorize( dev_examples, word_dict, sort_by_len=not args.test_only, concat=args.concat) word_dict_r = {} word_dict_r[0] = "unk" assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_x3, dev_y, dev_x4, args.batch_size, args.concat) dev_acc, rediction = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc.mean()) print(dev_acc.mean()) best_dev_acc = dev_acc best_train_acc = 0 if args.test_only: return dev_acc, best_train_acc utils.save_params(args.model_file, all_params, epoch=0, n_updates=0) # Training logging.info('-' * 50) logging.info('Start training..') train_x1, train_x2, train_x3, train_y, train_x4 = utils.vectorize( train_examples, word_dict, concat=args.concat) assert len(train_x1) == args.num_train start_time = time.time() n_updates = 0 all_train = gen_examples(train_x1, train_x2, train_x3, train_y, train_x4, args.batch_size, args.concat) for epoch in range(args.num_epoches): np.random.shuffle(all_train) for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_x3, mb_mask3, mb_y, mb_x4, mb_mask4) in enumerate(all_train): train_loss = train_fn(mb_x1, mb_mask1, mb_x3, mb_mask3, mb_y, mb_x4) # if idx % 100 == 0: # if epoch % 100 == 0: # logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1])) # logging.info('Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time)) n_updates += 1 if n_updates % args.eval_iter == 0: print([x.get_value() for x in params]) print([x.get_value() for x in all_params]) samples = sorted( np.random.choice(args.num_train, min(args.num_train, args.num_dev), replace=False)) sample_train = gen_examples( [train_x1[k] for k in samples], [train_x2[k] for k in samples], [train_x3[k * 4 + o] for k in samples for o in range(4)], [train_y[k] for k in samples], [train_x4[k] for k in samples], args.batch_size, args.concat) acc, pred = eval_acc(test_fn, sample_train) logging.info('Train accuracy: %.2f %%' % acc) train_acc, pred = eval_acc(test_fn, all_train) logging.info('train accuracy: %.2f %%' % train_acc) dev_acc, pred = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) if dev_acc > best_dev_acc: best_dev_acc = dev_acc logging.info( 'Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, best_dev_acc)) best_train_acc = acc logging.info( 'Best train accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, best_train_acc)) utils.save_params( args.model_file, all_params, epoch=epoch, n_updates=n_updates, ) return best_dev_acc, best_train_acc
def main(specstr=default_specstr, z_dim=256, num_epochs=10, ch=3, init_from='', img_size=64, pxsh=0.5, data_file='', batch_size=8, save_to='params'): # build expressions for the output, loss, gradient input_var = T.tensor4('inputs') print('building specstr {} - zdim {}'.format(specstr, z_dim)) cae = m.build_cae_nopoolinv(input_var, shape=(img_size, img_size), channels=ch, specstr=specstr.format(z_dim)) l_list = nn.layers.get_all_layers(cae) pred = nn.layers.get_output(cae) loss = nn.objectives.squared_error(pred, input_var.flatten(2)).mean() params = nn.layers.get_all_params(cae, trainable=True) grads = nn.updates.total_norm_constraint(T.grad(loss, params), 10) updates = nn.updates.adam(grads, params, learning_rate=1e-3) te_pred = nn.layers.get_output(cae, deterministic=True) te_loss = nn.objectives.squared_error(te_pred, input_var.flatten(2)).mean() # training functions print('compiling functions') train_fn = theano.function([input_var], loss, updates=updates) val_fn = theano.function([input_var], te_loss) # compile functions for encode/decode to test later enc_layer = l_list[next(i for i in xrange(len(l_list)) if l_list[i].name == 'encode')] enc_fn = theano.function([input_var], nn.layers.get_output(enc_layer, deterministic=True)) dec_fn = lambda z: nn.layers.get_output( cae, deterministic=True, inputs={ l_list[0]: np.zeros((z.shape[0], ch, img_size, img_size), dtype=theano.config.floatX), enc_layer: z }).eval().reshape(-1, ch, img_size, img_size) # load params if requested, run training if len(init_from) > 0: print('loading params from {}'.format(init_from)) load_params(cae, init_from) data = u.DataH5PyStreamer(data_file, batch_size=batch_size) print('training for {} epochs'.format(num_epochs)) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn, tr_transform=lambda x: u.raw_to_floatX( x[0], pixel_shift=pxsh, center=False), te_transform=lambda x: u.raw_to_floatX( x[0], pixel_shift=pxsh, center=True)) # generate examples, save training history te_stream = data.streamer(shuffled=True) imb, = next(te_stream.get_epoch_iterator()) tg = u.raw_to_floatX(imb, pixel_shift=pxsh, square=True, center=True) pr = dec_fn(enc_fn(tg)) for i in range(pr.shape[0]): u.get_image_pair(tg, pr, index=i, shift=pxsh).save('output_{}.jpg'.format(i)) hist = np.asarray(hist) np.savetxt('cae_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f') u.save_params(cae, os.path.join(save_to, 'cae_{}.npz'.format(hist[-1, -1])))
weight_decay=optim_params["weight_decay"]) elif "SGD" == optim_params["name"]: optimizer = optim.SGD(net.get_params_lr( lr_not_pretrained=optim_params["lr_not_pretrained"], lr_pretrained=optim_params["lr_pretrained"]), momentum=optim_params["momentum"], weight_decay=optim_params["weight_decay"]) # 学習 train_net(net, train_loader, test_loader, optimizer=optimizer, loss_fn=loss_fn, epochs=params["epochs"], device=device) # 推論 y, ypred = eval_net(net, test_loader, probability=True, device=device) # 正答率とネットワークの重みをリストに追加 ys.append(y.cpu().numpy()) ypreds.append(ypred.cpu().numpy()) recall = recall_score( ys[-1], ypreds[-1].argmax(1), average=None, zero_division=0) * 100 print("テストの各クラスrecall:\n{}\n平均:{}".format( np.round(recall, decimals=1), np.round(recall.mean(), decimals=1))) net_weights.append(net.cpu().state_dict()) utils.print_result(params, ys, ypreds) utils.save_params(params, net_weights)
def main(args): logging.info('-' * 50) logging.info('Load data files..') if args.debug: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, 100, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, 100, relabeling=args.relabeling) else: logging.info('*' * 10 + ' Train') train_examples = utils.load_data(args.train_file, relabeling=args.relabeling) logging.info('*' * 10 + ' Dev') dev_examples = utils.load_data(args.dev_file, args.max_dev, relabeling=args.relabeling) args.num_train = len(train_examples[0]) args.num_dev = len(dev_examples[0]) logging.info('-' * 50) logging.info('Build dictionary..') word_dict = utils.build_dict(train_examples[0] + train_examples[1]) entity_markers = list(set([w for w in word_dict.keys() if w.startswith('@entity')] + train_examples[2])) entity_markers = ['<unk_entity>'] + entity_markers entity_dict = {w: index for (index, w) in enumerate(entity_markers)} logging.info('Entity markers: %d' % len(entity_dict)) args.num_labels = len(entity_dict) logging.info('-' * 50) # Load embedding file embeddings = utils.gen_embeddings(word_dict, args.embedding_size, args.embedding_file) (args.vocab_size, args.embedding_size) = embeddings.shape logging.info('Compile functions..') train_fn, test_fn, params = build_fn(args, embeddings) logging.info('Done.') logging.info('-' * 50) logging.info(args) logging.info('-' * 50) logging.info('Intial test..') dev_x1, dev_x2, dev_l, dev_y = utils.vectorize(dev_examples, word_dict, entity_dict) assert len(dev_x1) == args.num_dev all_dev = gen_examples(dev_x1, dev_x2, dev_l, dev_y, args.batch_size) dev_acc = eval_acc(test_fn, all_dev) logging.info('Dev accuracy: %.2f %%' % dev_acc) best_acc = dev_acc if args.test_only: return utils.save_params(args.model_file, params, epoch=0, n_updates=0) # Training logging.info('-' * 50) logging.info('Start training..') train_x1, train_x2, train_l, train_y = utils.vectorize(train_examples, word_dict, entity_dict) assert len(train_x1) == args.num_train start_time = time.time() n_updates = 0 all_train = gen_examples(train_x1, train_x2, train_l, train_y, args.batch_size) for epoch in range(args.num_epoches): np.random.shuffle(all_train) for idx, (mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) in enumerate(all_train): logging.info('#Examples = %d, max_len = %d' % (len(mb_x1), mb_x1.shape[1])) train_loss = train_fn(mb_x1, mb_mask1, mb_x2, mb_mask2, mb_l, mb_y) logging.info('Epoch = %d, iter = %d (max = %d), loss = %.2f, elapsed time = %.2f (s)' % (epoch, idx, len(all_train), train_loss, time.time() - start_time)) n_updates += 1 if n_updates % args.eval_iter == 0: samples = sorted(np.random.choice(args.num_train, min(args.num_train, args.num_dev), replace=False)) sample_train = gen_examples([train_x1[k] for k in samples], [train_x2[k] for k in samples], train_l[samples], [train_y[k] for k in samples], args.batch_size) logging.info('Train accuracy: %.2f %%' % eval_acc(test_fn, sample_train)) logging.info('Dev accuracy: %.2f %%' % eval_acc(test_fn, all_dev)) if dev_acc > best_acc: best_acc = dev_acc logging.info('Best dev accuracy: epoch = %d, n_udpates = %d, acc = %.2f %%' % (epoch, n_updates, dev_acc)) utils.save_params(args.model_file, params, epoch=epoch, n_updates=n_updates)
def main(data_name, method, dimZ, dimH, n_channel, batch_size, K_mc, checkpoint, lbd): # set up dataset specific stuff from config import config labels, n_iter, dimX, shape_high, ll = config(data_name, n_channel) if data_name == 'mnist': from mnist import load_mnist if data_name == 'notmnist': from notmnist import load_notmnist # import functionalities if method == 'onlinevi': from bayesian_generator import generator_head, generator_shared, \ generator, construct_gen from onlinevi import construct_optimizer, init_shared_prior, \ update_shared_prior, update_q_sigma if method in ['ewc', 'noreg', 'laplace', 'si']: from generator import generator_head, generator_shared, generator, construct_gen if method in ['ewc', 'noreg']: from vae_ewc import construct_optimizer, lowerbound if method == 'ewc': from vae_ewc import update_ewc_loss, compute_fisher if method == 'laplace': from vae_laplace import construct_optimizer, lowerbound from vae_laplace import update_laplace_loss, compute_fisher, init_fisher_accum if method == 'si': from vae_si import construct_optimizer, lowerbound, update_si_reg # then define model n_layers_shared = 2 batch_size_ph = tf.placeholder(tf.int32, shape=(), name='batch_size') dec_shared = generator_shared(dimX, dimH, n_layers_shared, 'sigmoid', 'gen') # initialise sessions config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) string = method if method in ['ewc', 'laplace', 'si']: string = string + '_lbd%.1f' % lbd if method == 'onlinevi' and K_mc > 1: string = string + '_K%d' % K_mc path_name = data_name + '_%s/' % string if not os.path.isdir('save/'): os.mkdir('save/') if not os.path.isdir('save/' + path_name): os.mkdir('save/' + path_name) print 'create path save/' + path_name filename = 'save/' + path_name + 'checkpoint' if checkpoint < 0: print 'training from scratch' old_var_list = init_variables(sess) else: load_params(sess, filename, checkpoint) checkpoint += 1 # visualise the samples N_gen = 10**2 path = 'figs/' + path_name if not os.path.isdir('figs/'): os.mkdir('figs/') if not os.path.isdir(path): os.mkdir(path) print 'create path ' + path X_ph = tf.placeholder(tf.float32, shape=(batch_size, dimX), name='x_ph') # now start fitting N_task = len(labels) gen_ops = [] X_valid_list = [] X_test_list = [] eval_func_list = [] result_list = [] if method == 'onlinevi': shared_prior_params = init_shared_prior() if method in ['ewc', 'noreg']: ewc_loss = 0.0 if method == 'laplace': F_accum = init_fisher_accum() laplace_loss = 0.0 if method == 'si': old_params_shared = None si_reg = None n_layers_head = 2 n_layers_enc = n_layers_shared + n_layers_head - 1 for task in xrange(1, N_task + 1): # first load data if data_name == 'mnist': X_train, X_test, _, _ = load_mnist(digits=labels[task - 1], conv=False) if data_name == 'notmnist': X_train, X_test, _, _ = load_notmnist(data_path, digits=labels[task - 1], conv=False) N_train = int(X_train.shape[0] * 0.9) X_valid_list.append(X_train[N_train:]) X_train = X_train[:N_train] X_test_list.append(X_test) # define the head net and the generator ops dec = generator( generator_head(dimZ, dimH, n_layers_head, 'gen_%d' % task), dec_shared) enc = encoder(dimX, dimH, dimZ, n_layers_enc, 'enc_%d' % task) gen_ops.append(construct_gen(dec, dimZ, sampling=False)(N_gen)) print 'construct eval function...' eval_func_list.append(construct_eval_func(X_ph, enc, dec, ll, \ batch_size_ph, K=100, sample_W=False)) # then construct loss func and fit func print 'construct fit function...' if method == 'onlinevi': fit = construct_optimizer(X_ph, enc, dec, ll, X_train.shape[0], batch_size_ph, \ shared_prior_params, task, K_mc) if method in ['ewc', 'noreg']: bound = lowerbound(X_ph, enc, dec, ll) fit = construct_optimizer(X_ph, batch_size_ph, bound, X_train.shape[0], ewc_loss) if method == 'ewc': fisher, var_list = compute_fisher(X_ph, batch_size_ph, bound, X_train.shape[0]) if method == 'laplace': bound = lowerbound(X_ph, enc, dec, ll) fit = construct_optimizer(X_ph, batch_size_ph, bound, X_train.shape[0], laplace_loss) fisher, var_list = compute_fisher(X_ph, batch_size_ph, bound, X_train.shape[0]) if method == 'si': bound = lowerbound(X_ph, enc, dec, ll) fit, shared_var_list = construct_optimizer(X_ph, batch_size_ph, bound, X_train.shape[0], si_reg, old_params_shared, lbd) if old_params_shared is None: old_params_shared = sess.run(shared_var_list) # initialise all the uninitialised stuff old_var_list = init_variables(sess, old_var_list) # start training for each task if method == 'si': new_params_shared, w_params_shared = fit(sess, X_train, n_iter, lr) else: fit(sess, X_train, n_iter, lr) # plot samples x_gen_list = sess.run(gen_ops, feed_dict={batch_size_ph: N_gen}) for i in xrange(len(x_gen_list)): plot_images(x_gen_list[i], shape_high, path, \ data_name + '_gen_task%d_%d' % (task, i + 1)) x_list = [x_gen_list[i][:1] for i in xrange(len(x_gen_list))] x_list = np.concatenate(x_list, 0) tmp = np.zeros([10, dimX]) tmp[:task] = x_list if task == 1: x_gen_all = tmp else: x_gen_all = np.concatenate([x_gen_all, tmp], 0) # print test-ll on all tasks tmp_list = [] for i in xrange(len(eval_func_list)): print 'task %d' % (i + 1), test_ll = eval_func_list[i](sess, X_valid_list[i]) tmp_list.append(test_ll) result_list.append(tmp_list) # save param values save_params(sess, filename, checkpoint) checkpoint += 1 # update regularisers/priors if method == 'ewc': # update EWC loss print 'update ewc loss...' X_batch = X_train[np.random.permutation(range( X_train.shape[0]))[:batch_size]] ewc_loss = update_ewc_loss(sess, ewc_loss, var_list, fisher, lbd, X_batch) if method == 'laplace': # update EWC loss print 'update laplace loss...' X_batch = X_train[np.random.permutation(range( X_train.shape[0]))[:batch_size]] laplace_loss, F_accum = update_laplace_loss( sess, F_accum, var_list, fisher, lbd, X_batch) if method == 'onlinevi': # update prior print 'update prior...' shared_prior_params = update_shared_prior(sess, shared_prior_params) # reset the variance of q update_q_sigma(sess) if method == 'si': # update regularisers/priors print 'update SI big omega matrices...' si_reg, _ = update_si_reg(sess, si_reg, new_params_shared, old_params_shared, w_params_shared) old_params_shared = new_params_shared plot_images(x_gen_all, shape_high, path, data_name + '_gen_all') for i in xrange(len(result_list)): print result_list[i] # save results fname = 'results/' + data_name + '_%s.pkl' % string import pickle pickle.dump(result_list, open(fname, 'wb')) print 'test-ll results saved in', fname
def main(save_to='params', dataset = 'mm', kl_loss='true', # use kl-div in z-space instead of mse diffs = 'false', seq_length = 30, num_epochs=1, lstm_n_hid=1024, max_per_epoch=-1 ): kl_loss = kl_loss.lower() == 'true' diffs = diffs.lower() == 'true' # set up functions for data pre-processing and model training input_var = T.tensor4('inputs') # different experimental setup for moving mnist vs pulp fiction dataests if dataset == 'pf': img_size = 64 cae_weights = c.pf_cae_params cae_specstr = c.pf_cae_specstr split_layer = 'conv7' inpvar = T.tensor4('input') net = m.build_cae(inpvar, specstr=cae_specstr, shape=(img_size, img_size)) convs_from_img,_ = m.encoder_decoder(cae_weights, specstr=cae_specstr, layersplit=split_layer, shape=(img_size, img_size), poolinv=True) laydict = dict((l.name, l) for l in nn.layers.get_all_layers(net)) zdec_in_shape = nn.layers.get_output_shape(laydict[split_layer]) deconv_weights = c.pf_deconv_params vae_weights = c.pf_vae_params img_from_convs = m.deconvoluter(deconv_weights, specstr=cae_specstr, shape=zdec_in_shape) L=2 vae_n_hid = 1500 binary = False z_dim = 256 l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vae(input_var, L=L, binary=binary, z_dim=z_dim, n_hid=vae_n_hid, shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1]) u.load_params(l_x, vae_weights) datafile = 'data/pf.hdf5' frame_skip=3 # every 3rd frame in sequence z_decode_layer = l_x_mu_list[0] pixel_shift = 0.5 samples_per_image = 4 tr_batch_size = 16 # must be a multiple of samples_per_image elif dataset == 'mm': img_size = 64 cvae_weights = c.mm_cvae_params L=2 vae_n_hid = 1024 binary = True z_dim = 32 zdec_in_shape = (None, 1, img_size, img_size) l_tup = l_z_mu, l_z_ls, l_x_mu_list, l_x_ls_list, l_x_list, l_x = \ m.build_vcae(input_var, L=L, z_dim=z_dim, n_hid=vae_n_hid, binary=binary, shape=(zdec_in_shape[2], zdec_in_shape[3]), channels=zdec_in_shape[1]) u.load_params(l_x, cvae_weights) datafile = 'data/moving_mnist.hdf5' frame_skip=1 w,h=img_size,img_size # of raw input image in the hdf5 file z_decode_layer = l_x_list[0] pixel_shift = 0 samples_per_image = 1 tr_batch_size = 128 # must be a multiple of samples_per_image # functions for moving to/from image or conv-space, and z-space z_mat = T.matrix('z') zenc = theano.function([input_var], nn.layers.get_output(l_z_mu, deterministic=True)) zdec = theano.function([z_mat], nn.layers.get_output(z_decode_layer, {l_z_mu:z_mat}, deterministic=True).reshape((-1, zdec_in_shape[1]) + zdec_in_shape[2:])) zenc_ls = theano.function([input_var], nn.layers.get_output(l_z_ls, deterministic=True)) # functions for encoding sequences of z's print 'compiling functions' z_var = T.tensor3('z_in') z_ls_var = T.tensor3('z_ls_in') tgt_mu_var = T.tensor3('z_tgt') tgt_ls_var = T.tensor3('z_ls_tgt') learning_rate = theano.shared(nn.utils.floatX(1e-4)) # separate function definitions if we are using MSE and predicting only z, or KL divergence # and predicting both mean and sigma of z if kl_loss: def kl(p_mu, p_sigma, q_mu, q_sigma): return 0.5 * T.sum(T.sqr(p_sigma)/T.sqr(q_sigma) + T.sqr(q_mu - p_mu)/T.sqr(q_sigma) - 1 + 2*T.log(q_sigma) - 2*T.log(p_sigma)) lstm, _ = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid, training=True) z_mu_expr, z_ls_expr = nn.layers.get_output([lstm['output_mu'], lstm['output_ls']]) z_mu_expr_det, z_ls_expr_det = nn.layers.get_output([lstm['output_mu'], lstm['output_ls']], deterministic=True) loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr, T.exp(z_ls_expr)) te_loss = kl(tgt_mu_var, T.exp(tgt_ls_var), z_mu_expr_det, T.exp(z_ls_expr_det)) params = nn.layers.get_all_params(lstm['output'], trainable=True) updates = nn.updates.adam(loss, params, learning_rate=learning_rate) train_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], loss, updates=updates) test_fn = theano.function([z_var, z_ls_var, tgt_mu_var, tgt_ls_var], te_loss) else: lstm, _ = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=True) loss = nn.objectives.squared_error(nn.layers.get_output(lstm['output']), tgt_mu_var).mean() te_loss = nn.objectives.squared_error(nn.layers.get_output(lstm['output'], deterministic=True), tgt_mu_var).mean() params = nn.layers.get_all_params(lstm['output'], trainable=True) updates = nn.updates.adam(loss, params, learning_rate=learning_rate) train_fn = theano.function([z_var, tgt_mu_var], loss, updates=updates) test_fn = theano.function([z_var, tgt_mu_var], te_loss) if dataset == 'pf': z_from_img = lambda x: zenc(convs_from_img(x)) z_ls_from_img = lambda x: zenc_ls(convs_from_img(x)) img_from_z = lambda z: img_from_convs(zdec(z)) elif dataset == 'mm': z_from_img = zenc z_ls_from_img = zenc_ls img_from_z = zdec # training loop print('training for {} epochs'.format(num_epochs)) nbatch = (seq_length+1) * tr_batch_size * frame_skip / samples_per_image data = u.DataH5PyStreamer(datafile, batch_size=nbatch) # for taking arrays of uint8 (non square) and converting them to batches of sequences def transform_data(ims_batch, center=False): imb = u.raw_to_floatX(ims_batch, pixel_shift=pixel_shift, center=center)[np.random.randint(frame_skip)::frame_skip] zbatch = np.zeros((tr_batch_size, seq_length+1, z_dim), dtype=theano.config.floatX) zsigbatch = np.zeros((tr_batch_size, seq_length+1, z_dim), dtype=theano.config.floatX) for i in xrange(samples_per_image): chunk = tr_batch_size/samples_per_image if diffs: zf = z_from_img(imb).reshape((chunk, seq_length+1, -1)) zbatch[i*chunk:(i+1)*chunk, 1:] = zf[:,1:] - zf[:,:-1] if kl_loss: zls = z_ls_from_img(imb).reshape((chunk, seq_length+1, -1)) zsigbatch[i*chunk:(i+1)*chunk, 1:] = zls[:,1:] - zls[:,:-1] else: zbatch[i*chunk:(i+1)*chunk] = z_from_img(imb).reshape((chunk, seq_length+1, -1)) if kl_loss: zsigbatch[i*chunk:(i+1)*chunk] = z_ls_from_img(imb).reshape((chunk, seq_length+1, -1)) if kl_loss: return zbatch[:,:-1,:], zsigbatch[:,:-1,:], zbatch[:,1:,:], zsigbatch[:,1:,:] return zbatch[:,:-1,:], zbatch[:,1:,:] # we need sequences of images, so we do not shuffle data during trainin hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=test_fn, train_shuffle=False, max_per_epoch=max_per_epoch, tr_transform=lambda x: transform_data(x[0], center=False), te_transform=lambda x: transform_data(x[0], center=True)) hist = np.asarray(hist) u.save_params(lstm['output'], os.path.join(save_to, 'lstm_{}.npz'.format(hist[-1,-1]))) # build functions to sample from LSTM # separate cell_init and hid_init from the other learned model parameters all_param_values = nn.layers.get_all_param_values(lstm['output']) init_indices = [i for i,p in enumerate(nn.layers.get_all_params(lstm['output'])) if 'init' in str(p)] init_values = [all_param_values[i] for i in init_indices] params_noinit = [p for i,p in enumerate(all_param_values) if i not in init_indices] # build model without learnable init values, and load non-init parameters if kl_loss: lstm_sample, state_vars = m.Z_VLSTM(z_var, z_ls_var, z_dim=z_dim, nhid=lstm_n_hid, training=False) else: lstm_sample, state_vars = m.Z_LSTM(z_var, z_dim=z_dim, nhid=lstm_n_hid, training=False) nn.layers.set_all_param_values(lstm_sample['output'], params_noinit) # extract layers representing thee hidden and cell states, and have sample_fn # return their outputs state_layers_keys = [k for k in lstm_sample.keys() if 'hidfinal' in k or 'cellfinal' in k] state_layers_keys = sorted(state_layers_keys) state_layers_keys = sorted(state_layers_keys, key = lambda x:int(x.split('_')[1])) state_layers = [lstm_sample[s] for s in state_layers_keys] if kl_loss: sample_fn = theano.function([z_var, z_ls_var] + state_vars, nn.layers.get_output([lstm['output_mu'], lstm['output_ls']] + state_layers, deterministic=True)) else: sample_fn = theano.function([z_var] + state_vars, nn.layers.get_output([lstm['output']] + state_layers, deterministic=True)) from images2gif import writeGif from PIL import Image # sample approximately 30 different generated video sequences te_stream = data.streamer(training=True, shuffled=False) interval = data.ntrain / data.batch_size / 30 for idx,imb in enumerate(te_stream.get_epoch_iterator()): if idx % interval != 0: continue z_tup = transform_data(imb[0], center=True) seg_idx = np.random.randint(z_tup[0].shape[0]) if kl_loss: z_in, z_ls_in = z_tup[0], z_tup[1] z_last, z_ls_last = z_in[seg_idx:seg_idx+1], z_ls_in[seg_idx:seg_idx+1] z_vars = [z_last, z_ls_last] else: z_in = z_tup[0] z_last = z_in[seg_idx:seg_idx+1] z_vars = [z_last] images = [] state_values = [np.dot(np.ones((z_last.shape[0],1), dtype=theano.config.floatX), s) for s in init_values] output_list = sample_fn(*(z_vars + state_values)) # use whole sequence of predictions for output z_pred = output_list[0] state_values = output_list[2 if kl_loss else 1:] rec = img_from_z(z_pred.reshape(-1, z_dim)) for k in xrange(rec.shape[0]): images.append(Image.fromarray(u.get_picture_array(rec, index=k, shift=pixel_shift))) k += 1 # slice prediction to feed into lstm z_pred = z_pred[:,-1:,:] if kl_loss: z_ls_pred = output_list[1][:,-1:,:] z_vars = [z_pred, z_ls_pred] else: z_vars = [z_pred] for i in xrange(30): # predict 30 frames after the end of the priming video output_list = sample_fn(*(z_vars + state_values)) z_pred = output_list[0] state_values = output_list[2 if kl_loss else 1:] rec = img_from_z(z_pred.reshape(-1, z_dim)) images.append(Image.fromarray(u.get_picture_array(rec, index=0, shift=pixel_shift))) if kl_loss: z_ls_pred = output_list[1] z_vars = [z_pred, z_ls_pred] else: z_vars = [z_pred] writeGif("sample_{}.gif".format(idx),images,duration=0.1,dither=0)
def main(n_hid=256, lstm_layers=2, num_epochs=100, batch_size=32, save_to='output', max_per_epoch=-1): # load current set of words used words = open(c.words_used_file, 'r').readlines() idx_to_words = dict((i + 1, w.strip()) for i, w in enumerate(words)) idx_to_words[0] = '<e>' word_dim = len(words) + 1 # normalization expected by vgg-net mean_values = np.array([104, 117, 123]).reshape( (3, 1, 1)).astype(theano.config.floatX) # build function for extraction convolutional features img_var = T.tensor4('images') net = m.build_vgg(shape=(c.img_size, c.img_size), input_var=img_var) values = pickle.load(open(c.vgg_weights))['param values'] nn.layers.set_all_param_values(net['pool5'], values) conv_feats = theano.function([img_var], nn.layers.get_output(net['pool5'])) conv_shape = nn.layers.get_output_shape(net['pool5']) # helper function for converting word vector to one-hot raw_word_var = T.matrix('seq_raw') one_hot = theano.function([raw_word_var], nn.utils.one_hot(raw_word_var, m=word_dim)) # build expressions for lstm conv_feats_var = T.tensor4('conv') seq_var = T.tensor3('seq') lstm = m.build_rnn(conv_feats_var, seq_var, conv_shape, word_dim, n_hid, lstm_layers) output = nn.layers.get_output(lstm['output']) output_det = nn.layers.get_output(lstm['output'], deterministic=True) loss = m.categorical_crossentropy_logdomain(output, seq_var).mean() te_loss = m.categorical_crossentropy_logdomain(output_det, seq_var).mean() # compile training functions params = nn.layers.get_all_params(lstm['output'], trainable=True) lr = theano.shared(nn.utils.floatX(1e-3)) updates = nn.updates.adam(loss, params, learning_rate=lr) train_fn = theano.function([conv_feats_var, seq_var], loss, updates=updates) test_fn = theano.function([conv_feats_var, seq_var], te_loss) predict_fn = theano.function([conv_feats_var, seq_var], T.exp(output_det[:, -1:])) zeros = np.zeros((batch_size, 1, word_dim), dtype=theano.config.floatX) def transform_data(imb): y, x = imb # data augmentation: flip = -1 if we do flip over y-axis, 1 if not flip = -2 * np.random.binomial(1, p=0.5) + 1 # this vgg-net expects image values that are normalized by mean but not magnitude x = (u.raw_to_floatX(x[:,:,::flip], pixel_shift=0.)\ .transpose(0,1,3,2)[:,::-1] * 255. - mean_values) return conv_feats(x), np.concatenate([zeros, one_hot(y)], axis=1) data = u.DataH5PyStreamer(c.twimg_hdf5_file, batch_size=batch_size) hist = u.train_with_hdf5(data, num_epochs=num_epochs, train_fn=train_fn, test_fn=test_fn, max_per_epoch=max_per_epoch, tr_transform=transform_data, te_transform=transform_data) np.savetxt('lstm_train_hist.csv', np.asarray(hist), delimiter=',', fmt='%.5f') u.save_params( lstm['output'], os.path.join(save_to, 'lstm_{}.npz'.format(np.asarray(hist)[-1, -1]))) # generate some example captions for one batch of images streamer = data.streamer(training=False, shuffled=True) y_raw, x_raw = next(streamer.get_epoch_iterator()) x, _ = transform_data((y_raw, x_raw)) y = zeros captions = [] for idx in xrange(y.shape[0]): captions.append([]) idx_to_words[0] = '<e>' for sample_num in xrange(c.max_caption_len): pred = predict_fn(x, y) new_y = [] for idx in xrange(pred.shape[0]): # reduce size by a small factor to prevent numerical imprecision from # making it sum to > 1. # reverse it so that <e> gets the additional probability, not a word sample = np.random.multinomial(1, pred[idx, 0, ::-1] * .999999)[::-1] captions[idx].append(idx_to_words[np.argmax(sample)]) new_y.append(sample) new_y = np.vstack(new_y).reshape(-1, 1, word_dim).astype(theano.config.floatX) y = np.concatenate([y, new_y], axis=1) captions = [ '{},{}\n'.format(i, ' '.join(cap)) for i, cap in enumerate(captions) ] with open(os.path.join(save_to, 'captions_sample.csv'), 'w') as wr: wr.writelines(captions) for idx in xrange(x_raw.shape[0]): Image.fromarray(x_raw[idx].transpose(2, 1, 0)).save( os.path.join(save_to, 'ex_{}.jpg'.format(idx)))
def main( data_file="", img_size=64, num_epochs=10, batch_size=128, pxsh=0.5, split_layer="conv7", specstr=c.pf_cae_specstr, cae_params=c.pf_cae_params, save_to="params", ): # transform function to go from images -> conv feats conv_feats, _ = m.encoder_decoder(cae_params, specstr=specstr, layersplit=split_layer, shape=(img_size, img_size)) # build pretrained net for images -> convfeats in order to get the input shape # for the reverse function print("compiling functions") conv_net = m.build_cae(input_var=None, specstr=specstr, shape=(img_size, img_size)) cae_layer_dict = dict((l.name, l) for l in nn.layers.get_all_layers(conv_net)) shape = nn.layers.get_output_shape(cae_layer_dict[split_layer]) # build net for convfeats -> images imgs_var = T.tensor4("images") convs_var = T.tensor4("conv_features") deconv_net = m.build_deconv_net(input_var=convs_var, shape=shape, specstr=specstr) loss = nn.objectives.squared_error(imgs_var, nn.layers.get_output(deconv_net)).mean() te_loss = nn.objectives.squared_error(imgs_var, nn.layers.get_output(deconv_net, deterministic=True)).mean() params = nn.layers.get_all_params(deconv_net, trainable=True) lr = theano.shared(nn.utils.floatX(3e-3)) updates = nn.updates.adam(loss, params, learning_rate=lr) # compile functions train_fn = theano.function([convs_var, imgs_var], loss, updates=updates) val_fn = theano.function([convs_var, imgs_var], te_loss) deconv_fn = theano.function([convs_var], nn.layers.get_output(deconv_net, deterministic=True)) # run training loop print("training for {} epochs".format(num_epochs)) def data_transform(x, do_center): floatx_ims = u.raw_to_floatX(x, pixel_shift=pxsh, square=True, center=do_center) return (conv_feats(floatx_ims), floatx_ims) data = u.DataH5PyStreamer(data_file, batch_size=batch_size) hist = u.train_with_hdf5( data, num_epochs=num_epochs, train_fn=train_fn, test_fn=val_fn, tr_transform=lambda x: data_transform(x[0], do_center=False), te_transform=lambda x: data_transform(x[0], do_center=True), ) # generate examples, save training history and params te_stream = data.streamer(shuffled=True) imb, = next(te_stream.get_epoch_iterator()) imb = data_transform(imb, True)[0] result = deconv_fn(imb) for i in range(result.shape[0]): Image.fromarray(u.get_picture_array(result, index=i, shift=pxsh)).save("output_{}.jpg".format(i)) hist = np.asarray(hist) np.savetxt("deconv_train_hist.csv", np.asarray(hist), delimiter=",", fmt="%.5f") u.save_params(deconv_net, os.path.join(save_to, "deconv_{}.npz".format(hist[-1, -1])))