def __init__(self, config, session): self.config = config self.session = session if self.config.method == 'svgd': self.filepath = '%s_%s_%s_%s_%d' % ( config.method, config.dataset, config.kernel, repr(config.temperature), config.seed) else: self.filepath = '%s_%s' % (config.method, config.dataset) self.res_dir = './results/%s/' % self.filepath self.fig_dir = './results/%s/figures' % self.filepath self.res_gmm_dir = './results/%s/gmm' % self.filepath self.res_pretrain_dir = './results/%s_%s_pretrain' % ( self.config.method, self.config.dataset) #for folder in [self.train_dir, self.fig_dir]: import glob for folder in [ self.res_dir, self.fig_dir, self.res_gmm_dir, self.res_pretrain_dir ]: if not os.path.exists(folder): os.makedirs(folder) ### clean ###if self.config.clean: #files = glob.glob(folder + '/events.*') + glob.glob(folder + '/*.png') #for f in files: os.remove(f) #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir) if self.config.method == 'svgd': from model_svgd import SVGD self.model = SVGD(config) else: raise NotImplementedError # --- optimizer --- self.global_step = tf.Variable(0, name="global_step", trainable=False) self.learning_rate = config.learning_rate self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=1) #self.summary_writer = tf.summary.FileWriter(self.train_dir) self.summary_writer = tf.summary.FileWriter(self.res_dir) self.checkpoint_secs = 300 # 5 min ## prtraining op self.pre_train_op = self.optimize_adam(self.model.net_train_vars, \ loss=self.model.loss_recons_noisy, lr=self.config.learning_rate) if self.config.method == 'svgd': self.depict_op = self.optimize_adam(self.model.train_vars, \ loss=self.model.depict_loss, lr=self.learning_rate) self.svgd_op = self.optimize_adam(self.model.gmm_train_vars, \ train_grads=self.model.gmm_train_grads, lr=self.config.learning_rate) tf.global_variables_initializer().run()
def __init__(self, config, dataset, session): self.config = config self.session = session self.dataset = dataset self.filepath = '%s-%s' % ( config.method, config.dataset, ) self.train_dir = './train_dir/%s' % self.filepath #self.fig_dir = './figures/%s' % self.filepath #for folder in [self.train_dir, self.fig_dir]: for folder in [self.train_dir]: if not os.path.exists(folder): os.makedirs(folder) # clean train folder if self.config.clean: files = glob.glob(folder + '/*') for f in files: os.remove(f) #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir) # --- create model --- self.model = SVGD(config) # --- optimizer --- #self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None) self.global_step = tf.Variable(0, name="global_step") self.learning_rate = config.learning_rate if config.lr_weight_decay: decay_step = int(0.1 * self.config.n_epoches * self.config.n_train // self.config.batch_size) self.learning_rate = tf.train.exponential_decay( self.learning_rate, global_step=self.global_step, decay_steps=decay_step, decay_rate=0.8, staircase=True, name='decaying_learning_rate') self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=1) self.summary_writer = tf.summary.FileWriter(self.train_dir) self.checkpoint_secs = 300 # 5 min #self.train_op = self.optimize_adam( self.model.kl_loss, lr=self.learning_rate) if self.config.method == 'svgd': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.svgd_grads, lr=self.learning_rate) elif self.config.method in ['svgd_kfac', 'map_kfac', 'mixture_kfac']: self.inc_op = self.model.inc_ops self.inv_op = self.model.inv_ops if self.config.method == 'svgd_kfac': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.svgd_kfac_grads, lr=self.learning_rate) elif self.config.method == 'map_kfac': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.map_kfac_grads, lr=self.learning_rate) elif self.config.method == 'mixture_kfac': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.mixture_kfac_grads, lr=self.learning_rate) elif self.config.method in ['SGLD', 'pSGLD']: self.train_op = self.optimize_sgd( self.model.train_vars, train_grads=self.model.psgld_grads, lr=1.0) tf.global_variables_initializer().run() if config.checkpoint is not None: self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint) if self.ckpt_path is not None: log.info("Checkpoint path: %s", self.ckpt_path) self.saver.restore(self.session, self.ckpt_path) log.info( "Loaded the pretrain parameters from the provided checkpoint path" )
class Trainer(object): def optimize_sgd(self, train_vars, loss=None, train_grads=None, lr=1e-2): optimizer = tf.train.GradientDescentOptimizer( learning_rate=lr) #adagrad with momentum if train_grads is not None: train_op = optimizer.apply_gradients(zip(train_grads, train_vars)) else: train_op = optimizer.minimize(tf.reduce_mean(loss), var_list=train_vars, global_step=self.global_step) return train_op def optimize_adagrad(self, train_vars, loss=None, train_grads=None, lr=1e-2): optimizer = tf.train.RMSPropOptimizer( learning_rate=lr, decay=0.95) #adagrad with momentum if train_grads is not None: train_op = optimizer.apply_gradients(zip(train_grads, train_vars)) else: train_op = optimizer.minimize(tf.reduce_mean(loss), var_list=train_vars, global_step=self.global_step) return train_op def optimize_adam(self, train_vars, loss=None, train_grads=None, lr=1e-2): assert (loss is not None) or (train_grads is not None), 'illegal inputs' optimizer = tf.train.AdamOptimizer(lr) if train_grads is not None: train_op = optimizer.apply_gradients(zip(train_grads, train_vars)) else: train_op = optimizer.minimize(loss, var_list=train_vars, global_step=self.global_step) return train_op def __init__(self, config, dataset, session): self.config = config self.session = session self.dataset = dataset self.filepath = '%s-%s' % ( config.method, config.dataset, ) self.train_dir = './train_dir/%s' % self.filepath #self.fig_dir = './figures/%s' % self.filepath #for folder in [self.train_dir, self.fig_dir]: for folder in [self.train_dir]: if not os.path.exists(folder): os.makedirs(folder) # clean train folder if self.config.clean: files = glob.glob(folder + '/*') for f in files: os.remove(f) #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir) # --- create model --- self.model = SVGD(config) # --- optimizer --- #self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None) self.global_step = tf.Variable(0, name="global_step") self.learning_rate = config.learning_rate if config.lr_weight_decay: decay_step = int(0.1 * self.config.n_epoches * self.config.n_train // self.config.batch_size) self.learning_rate = tf.train.exponential_decay( self.learning_rate, global_step=self.global_step, decay_steps=decay_step, decay_rate=0.8, staircase=True, name='decaying_learning_rate') self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=1) self.summary_writer = tf.summary.FileWriter(self.train_dir) self.checkpoint_secs = 300 # 5 min #self.train_op = self.optimize_adam( self.model.kl_loss, lr=self.learning_rate) if self.config.method == 'svgd': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.svgd_grads, lr=self.learning_rate) elif self.config.method in ['svgd_kfac', 'map_kfac', 'mixture_kfac']: self.inc_op = self.model.inc_ops self.inv_op = self.model.inv_ops if self.config.method == 'svgd_kfac': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.svgd_kfac_grads, lr=self.learning_rate) elif self.config.method == 'map_kfac': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.map_kfac_grads, lr=self.learning_rate) elif self.config.method == 'mixture_kfac': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.mixture_kfac_grads, lr=self.learning_rate) elif self.config.method in ['SGLD', 'pSGLD']: self.train_op = self.optimize_sgd( self.model.train_vars, train_grads=self.model.psgld_grads, lr=1.0) tf.global_variables_initializer().run() if config.checkpoint is not None: self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint) if self.ckpt_path is not None: log.info("Checkpoint path: %s", self.ckpt_path) self.saver.restore(self.session, self.ckpt_path) log.info( "Loaded the pretrain parameters from the provided checkpoint path" ) def evaluate(self, ): dev_set = { 'X': self.dataset.x_train[:1000], 'y': self.dataset.y_train[:1000], } test_set = { 'X': self.dataset.x_test, 'y': self.dataset.y_test, } pred_y_dev = self.session.run(self.model.y_pred, self.model.get_feed_dict(dev_set)) pred_y_dev = pred_y_dev * self.dataset.std_y_train + self.dataset.mean_y_train y_dev = dev_set[ 'y'] * self.dataset.std_y_train + self.dataset.mean_y_train neg_log_var = -np.log(np.mean((pred_y_dev - y_dev)**2)) y_test = test_set['y'] pred_y_test = self.session.run(self.model.y_pred, self.model.get_feed_dict(test_set)) pred_y_test = pred_y_test * self.dataset.std_y_train + self.dataset.mean_y_train prob = np.sqrt(np.exp(neg_log_var) / (2 * np.pi)) * np.exp( -0.5 * (pred_y_test - np.expand_dims(y_test, 0))**2 * np.exp(neg_log_var)) rmse = np.sqrt(np.mean((y_test - np.mean(pred_y_test, 0))**2)) ll = np.mean(np.log(np.mean(prob, axis=0))) return rmse, ll, neg_log_var def train(self): log.infov("Training Starts!") output_save_step = 1000 buffer_save_step = 100 self.session.run(self.global_step.assign(0)) # reset global step n_updates = 1 x_train, y_train = self.dataset.x_train, self.dataset.y_train for ep in xrange(1, 1 + self.config.n_epoches): x_train, y_train = shuffle(x_train, y_train) #x_train, y_train = self.dataset.x_train, self.dataset.y_train max_batches = self.config.n_train // self.config.batch_size #if self.config.n_train % self.config.batch_size != 0: max_batches += 1 for bi in xrange(max_batches): start = bi * self.config.batch_size end = min((bi + 1) * self.config.batch_size, self.config.n_train) batch_chunk = { 'X': x_train[start:end], 'y': y_train[start:end] } step, summary, log_prob, step_time = \ self.run_single_step(n_updates, batch_chunk) if np.any(np.isnan(log_prob)): sys.exit(1) self.summary_writer.add_summary(summary, global_step=step) #if n_updates % 500 == 0: # self.log_step_message(n_updates, log_prob, step_time) #if n_updates % 50 == 0: # rmse, ll, _ = self.evaluate() # print(n_updates, rmse, ll) n_updates += 1 if ep % (self.config.n_epoches // 10 + 1) == 0: rmse, ll, neg_log_var = self.evaluate() print(ep, neg_log_var, rmse, ll) test_rmse, test_ll, _ = self.evaluate() write_time = time.strftime("%m-%d-%H:%M:%S") pardir = "%s_%s/" % (self.config.method, repr( self.config.learning_rate)) if not os.path.exists(self.config.savepath + pardir): os.makedirs(self.config.savepath + pardir) if self.config.trial == 1: fm = 'w' else: fm = 'a' with open( self.config.savepath + pardir + self.config.dataset + "_test_ll_rmse_%s.txt" % (self.filepath), fm) as f: f.write( repr(self.config.trial) + ',' + write_time + ',' + repr(self.config.n_epoches) + ',' + repr(test_rmse) + ',' + repr(test_ll) + '\n') #if self.config.save: # # save model at the end # self.saver.save(self.session, # os.path.join(self.train_dir, 'model'), # global_step=step) def run_single_step(self, step, batch_chunk): _start_time = time.time() fetch = [self.global_step, self.summary_op, self.model.log_prob] if self.config.method in ['svgd_kfac', 'map_kfac', 'mixture_kfac']: fetch += [self.inc_op] if step % self.config.inverse_update_freq == 0: fetch += [self.inv_op] if self.config.method == 'pSGLD': fetch += [self.model.moment_op] fetch += [self.train_op] fetch_values = self.session.run(fetch, feed_dict=self.model.get_feed_dict( batch_chunk, step)) [step, summary, log_prob] = fetch_values[:3] _end_time = time.time() return step, summary, np.sum(log_prob), (_end_time - _start_time) def log_step_message(self, step, log_prob, step_time, is_train=True): if step_time == 0: step_time = 0.001 log_fn = (is_train and log.info or log.infov) log_fn(( " [{split_mode:5s} step {step:4d}] " + #"loss: {loss:.4f} " + "log_prob: {log_prob:.4f} " + "({sec_per_batch:.3f} sec/batch)").format( split_mode=(is_train and 'train' or 'val'), step=step, log_prob=log_prob, sec_per_batch=step_time, ))
class Trainer(object): def optimize_adagrad(self, train_vars, loss=None, train_grads=None, lr=1e-2): optimizer = tf.train.RMSPropOptimizer( learning_rate=lr, decay=0.9) #adagrad with momentum if train_grads is not None: clip_grads = [(tf.clip_by_norm(grad, 20), var) for grad, var in zip(train_grads, train_vars)] #train_op = optimizer.apply_gradients(zip(train_grads, train_vars)) train_op = optimizer.apply_gradients(clip_grads) else: train_op = optimizer.minimize(tf.reduce_mean(loss), var_list=train_vars, global_step=self.global_step) return train_op def optimize_adam(self, train_vars, loss=None, train_grads=None, lr=1e-2): assert (loss is not None) or (train_grads is not None), 'illegal inputs' optimizer = tf.train.AdamOptimizer(lr) if train_grads is not None: clip_grads = [(tf.clip_by_norm(grad, 20), var) for grad, var in zip(train_grads, train_vars)] #train_op = optimizer.apply_gradients(zip(train_grads, train_vars)) train_op = optimizer.apply_gradients(clip_grads) else: train_op = optimizer.minimize(loss, var_list=train_vars, global_step=self.global_step) return train_op def __init__(self, config, session): self.config = config self.session = session if self.config.method == 'svgd': self.filepath = '%s_%s_%s_%s_%d' % ( config.method, config.dataset, config.kernel, repr(config.temperature), config.seed) else: self.filepath = '%s_%s' % (config.method, config.dataset) self.res_dir = './results/%s/' % self.filepath self.fig_dir = './results/%s/figures' % self.filepath self.res_gmm_dir = './results/%s/gmm' % self.filepath self.res_pretrain_dir = './results/%s_%s_pretrain' % ( self.config.method, self.config.dataset) #for folder in [self.train_dir, self.fig_dir]: import glob for folder in [ self.res_dir, self.fig_dir, self.res_gmm_dir, self.res_pretrain_dir ]: if not os.path.exists(folder): os.makedirs(folder) ### clean ###if self.config.clean: #files = glob.glob(folder + '/events.*') + glob.glob(folder + '/*.png') #for f in files: os.remove(f) #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir) if self.config.method == 'svgd': from model_svgd import SVGD self.model = SVGD(config) else: raise NotImplementedError # --- optimizer --- self.global_step = tf.Variable(0, name="global_step", trainable=False) self.learning_rate = config.learning_rate self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=1) #self.summary_writer = tf.summary.FileWriter(self.train_dir) self.summary_writer = tf.summary.FileWriter(self.res_dir) self.checkpoint_secs = 300 # 5 min ## prtraining op self.pre_train_op = self.optimize_adam(self.model.net_train_vars, \ loss=self.model.loss_recons_noisy, lr=self.config.learning_rate) if self.config.method == 'svgd': self.depict_op = self.optimize_adam(self.model.train_vars, \ loss=self.model.depict_loss, lr=self.learning_rate) self.svgd_op = self.optimize_adam(self.model.gmm_train_vars, \ train_grads=self.model.gmm_train_grads, lr=self.config.learning_rate) tf.global_variables_initializer().run() def iterate_minibatches(self, inputs, targets, batchsize, shuffle=False): if shuffle: indices = np.arange(len(inputs)) np.random.shuffle(indices) max_batches = len(inputs) // batchsize if len(inputs) % batchsize != 0: max_batches += 1 for i in range(max_batches): start_idx = i * batchsize end_idx = min(len(inputs), (i + 1) * batchsize) if shuffle: excerpt = indices[start_idx:end_idx] else: excerpt = slice(start_idx, end_idx) yield inputs[excerpt], targets[excerpt], excerpt def try_load_checkpoint(self, model_path): ckpt_path = tf.train.latest_checkpoint(model_path) assert ckpt_path is not None, '%s is empty' % model_path log.info("Checkpoint path: %s", ckpt_path) self.saver.restore(self.session, ckpt_path) log.info( "Loaded the pretrain parameters from the provided checkpoint path") def save_curr_model(self, model_path): step = self.session.run(self.global_step) self.saver.save(self.session, model_path, global_step=step) def get_latent_rep_and_pred(self, inputs, targets, batch_size=100): y_pred, latent_z = [], [] for batch in self.iterate_minibatches(inputs, targets, batch_size, shuffle=False): x_batch, _, _ = batch pred, z = self.session.run( [self.model.pred_clean, self.model.z], feed_dict=self.model.get_feed_dict(x_batch)) y_pred.append(pred) latent_z.append(z) y_pred = np.concatenate(y_pred, axis=0) latent_z = np.concatenate(latent_z, axis=0) return latent_z, np.argmax(y_pred, 1) ''' pre-training auto encoder ''' def pre_train_enc_dec(self, inputs, targets, num_epochs=1000, batch_size=100): for epoch in range(1, num_epochs + 1): train_err = 0 for batch in self.iterate_minibatches(inputs, targets, batch_size, shuffle=True): x_batch, _, _ = batch err, _ = self.session.run( [self.model.loss_recons_clean, self.pre_train_op], \ feed_dict=self.model.get_feed_dict(x_batch)) train_err += err log.info( ("pre-training autoencoder epoch: {:d}, loss:{:4f}").format( epoch, train_err)) if epoch % (num_epochs // 10) == 0: self.save_curr_model( os.path.join(self.res_pretrain_dir, 'model')) #save model latent_z, _ = self.get_latent_rep_and_pred(inputs, targets) y_pred, _ = clustering(latent_z, self.config.num_clusters) metrics(targets, y_pred) def train_svgd(self, inputs, targets, batch_size=100, num_epochs=4000): def normalize(y_prob): cluster_frequency = np.sum(y_prob, axis=0) y_prob = y_prob**2 / cluster_frequency y_prob = np.transpose(y_prob.T / np.sum(y_prob, axis=1)) y_pred = np.argmax(y_prob, axis=1) return y_prob, y_pred n_train = len(inputs) y_prob = np.zeros((n_train, self.config.num_clusters)) y_prob_prev = np.zeros((n_train, self.config.num_clusters)) for batch in self.iterate_minibatches(inputs, targets, batch_size, shuffle=False): x_batch, _, idx_batch = batch #TODO minibatch_prob = self.session.run( self.model.pred_clean, feed_dict=self.model.get_feed_dict(x_batch)) y_prob[idx_batch] = minibatch_prob if True: y_prob, y_pred = normalize(y_prob) n_updates = 0 for epoch in range(1, num_epochs + 1): recon_loss_iter, clus_loss_iter, loss_iter, energy_iter = 0., 0., 0, 0 for batch in self.iterate_minibatches(inputs, targets, batch_size, shuffle=True): x_batch, _, idx_batch = batch fetch_values = [ self.model.loss, self.model.loss_recons_noisy, self.model.loss_clus, self.model.energy_noisy, self.summary_op, self.depict_op ] if epoch > 20: fetch_values.append(self.svgd_op) ret = \ self.session.run( fetch_values, feed_dict=self.model.get_feed_dict(x_batch, y_prob[idx_batch])) loss, loss_recons, loss_clus, energy, summary = ret[:5] minibatch_prob = self.session.run( self.model.pred_clean, feed_dict=self.model.get_feed_dict(x_batch)) y_prob[idx_batch] = minibatch_prob loss_iter += loss recon_loss_iter += loss_recons clus_loss_iter += loss_clus energy_iter += energy self.summary_writer.add_summary(summary, global_step=n_updates) n_updates += 1 print(epoch, 'recon_loss', recon_loss_iter, 'clus_loss', clus_loss_iter, 'loss', loss_iter, 'energy', energy_iter) print(epoch, metrics(targets, y_pred)) if True: y_prob, y_pred = normalize(y_prob) if np.sum((y_pred - np.argmax(y_prob_prev, axis=1))**2) < 1e-6: break y_prob_prev = np.copy(y_prob) if epoch % 10 == 0: latent_z, y_pred = self.get_latent_rep_and_pred( inputs, targets) plot_latent_z_space(latent_z, y_pred, '%s/step-%d.png' % (self.res_dir, epoch)) print(epoch, metrics(targets, y_pred)) def train(self): log.infov("Training Starts!") output_save_step = 1000 self.session.run(self.global_step.assign(0)) # reset global step if self.config.dataset == 'mnist': from load import load_mnist inputs, targets = load_mnist() else: raise NotImplementedError if self.config.method == 'kmeans': y_pred, _ = clustering(np.reshape(inputs, (len(inputs), -1)), self.config.num_clusters) metrics(targets, y_pred) return ''' pre-training ''' if not self.config.skip_pretrain: self.pre_train_enc_dec(inputs, targets, batch_size=self.config.batch_size, num_epochs=1000) # save model self.save_curr_model(os.path.join(self.res_pretrain_dir, 'model')) else: self.try_load_checkpoint(self.res_pretrain_dir) # plot latent_z, _ = self.get_latent_rep_and_pred(inputs, targets) y_pred, centroids = clustering(latent_z, self.config.num_clusters) plot_latent_z_space(latent_z, y_pred, \ '%s/pre_train_z' % self.res_dir, with_legend=True) #sys.exit(0) if self.config.method == 'svgd': if not self.config.skip_svgd: self.session.run(self.model.mu.assign(centroids)) #scale = np.zeros((self.config.num_clusters, self.config.z_dim*(self.config.z_dim+1)//2)) scale = np.zeros((self.config.num_clusters, self.config.z_dim)) for c in range(self.config.num_clusters): z_c = latent_z[np.where(y_pred == c)[0]] s0 = np.std(z_c, axis=0) scale[c] = s0 self.session.run(self.model.scale_diag.assign(scale)) self.train_svgd(inputs, targets, num_epochs=400, batch_size=self.config.batch_size) self.save_curr_model(os.path.join(self.res_dir, 'model')) else: self.try_load_checkpoint(self.res_dir) # plot latent_z, y_pred = self.get_latent_rep_and_pred(inputs, targets) #y_pred, centroids = clustering(latent_z, self.config.num_clusters) plot_latent_z_space(latent_z, y_pred, \ '%s/%s_z' % (self.res_dir, self.config.method), with_legend=True)
class Trainer(object): def optimize_sgd(self, train_vars, loss=None, train_grads=None, lr=1e-2): optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) #adagrad with momentum if train_grads is not None: train_op = optimizer.apply_gradients(zip(train_grads, train_vars)) else: train_op = optimizer.minimize(tf.reduce_mean(loss), var_list=train_vars, global_step=self.global_step) return train_op def optimize_adagrad(self, train_vars, loss=None, train_grads=None, lr=1e-2): optimizer = tf.train.RMSPropOptimizer(learning_rate=lr, decay=0.9) #adagrad with momentum if train_grads is not None: train_op = optimizer.apply_gradients(zip(train_grads, train_vars)) else: train_op = optimizer.minimize(tf.reduce_mean(loss), var_list=train_vars, global_step=self.global_step) return train_op def optimize_adam(self, train_vars, loss=None, train_grads=None, lr=1e-2): assert (loss is not None) or (train_grads is not None), 'illegal inputs' optimizer = tf.train.AdamOptimizer(lr) if train_grads is not None: train_op = optimizer.apply_gradients(zip(train_grads, train_vars)) else: train_op = optimizer.minimize(loss, var_list=train_vars, global_step=self.global_step) return train_op def __init__(self, config, dataset, session): self.config = config self.session = session self.dataset = dataset self.filepath = '%s' % ( config.method, ) self.train_dir = './train_dir/%s' % self.filepath #self.fig_dir = './figures/%s' % self.filepath #for folder in [self.train_dir, self.fig_dir]: for folder in [self.train_dir]: if not os.path.exists(folder): os.makedirs(folder) # clean train folder if self.config.clean: files = glob.glob(folder + '/*') for f in files: os.remove(f) #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir) # --- create model --- self.model = SVGD(config) # --- optimizer --- #self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None) self.global_step = tf.Variable(0, name="global_step") self.learning_rate = config.learning_rate #self.learning_rate = tf.train.exponential_decay( # self.learning_rate, # global_step=self.global_step, # decay_steps=500, # decay_rate=0.5, # staircase=True, # name='decaying_learning_rate' #) self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=1) self.summary_writer = tf.summary.FileWriter(self.train_dir) self.checkpoint_secs = 300 # 5 min ##self.train_op = self.optimize_adam( self.model.kl_loss, lr=self.learning_rate) if self.config.method == 'svgd': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.svgd_grads, lr=self.learning_rate) elif self.config.method == 'svgd_kfac': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.kfac_grads, lr=self.learning_rate) elif self.config.method == 'mixture_kfac': self.train_op = self.optimize_adagrad( self.model.train_vars, train_grads=self.model.mixture_grads, lr=self.learning_rate) elif self.config.method in ['SGLD', 'pSGLD']: self.train_op = self.optimize_sgd( self.model.train_vars, train_grads=self.model.psgld_grads, lr=1.0) tf.global_variables_initializer().run() if config.checkpoint is not None: self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint) if self.ckpt_path is not None: log.info("Checkpoint path: %s", self.ckpt_path) self.saver.restore(self.session, self.ckpt_path) log.info("Loaded the pretrain parameters from the provided checkpoint path") def evaluate(self, step): def get_lik_and_acc(X, y): n = len(X) ll, acc = [], [] batch_size = 2000 for i in range( n // batch_size +1 ): start = i * batch_size end = min((i+1)*batch_size, n) batch = { 'X': X[start:end], 'y': y[start:end], } ll_i, acc_i = self.session.run([self.model.ll, self.model.accuracy], feed_dict=self.model.get_feed_dict(batch, step)) ll.append(ll_i) acc.append(acc_i) return np.mean(ll), np.mean(acc) train_ll, train_acc = get_lik_and_acc(self.dataset.x_train, self.dataset.y_train) valid_ll, valid_acc = get_lik_and_acc(self.dataset.x_valid, self.dataset.y_valid) test_ll, test_acc = get_lik_and_acc(self.dataset.x_test, self.dataset.y_test) return train_ll, train_acc, valid_ll, valid_acc, test_ll, test_acc def train(self): log.infov("Training Starts!") output_save_step = 1000 buffer_save_step = 100 self.session.run(self.global_step.assign(0)) # reset global step n_updates = 1 for ep in xrange(1, 1+self.config.n_epoches): x_train, y_train = shuffle(self.dataset.x_train, self.dataset.y_train) max_batches = self.config.n_train // self.config.batch_size #if self.config.n_train % self.config.batch_size != 0: max_batches += 1 for bi in xrange(max_batches): start = bi * self.config.batch_size end = min((bi+1) * self.config.batch_size, self.config.n_train) batch_chunk = { 'X': x_train[start:end], 'y': y_train[start:end] } step, summary, log_prob, step_time = \ self.run_single_step(n_updates, batch_chunk) #if np.any(np.isnan(log_prob)): sys.exit(1) self.summary_writer.add_summary(summary, global_step=step) #if n_updates % 100 == 0: # self.log_step_message(n_updates, log_prob, step_time) if n_updates % 50 == 0: print (n_updates, self.evaluate(n_updates)) n_updates+= 1 #if ep % (self.config.n_epoches//10 + 1) == 0: # rmse, ll = self.evaluate() # print(ep, rmse, ll) #test_rmse, test_ll = self.evaluate() #write_time = time.strftime("%m-%d-%H:%M:%S") #with open(self.config.savepath + self.config.dataset + "_test_ll_rmse_%s.txt" % (self.filepath), 'a') as f: # f.write(repr(self.config.trial) + ',' + write_time + ',' + repr(self.config.n_epoches) + ',' + repr(test_rmse) + ',' + repr(test_ll) + '\n') #if self.config.save: # # save model at the end # self.saver.save(self.session, # os.path.join(self.train_dir, 'model'), # global_step=step) def run_single_step(self, step, batch_chunk): _start_time = time.time() fetch = [self.global_step, self.summary_op, self.model.log_prob] if self.config.method in ['mixture_kfac', 'svgd_kfac']: fetch += [self.model.cov_update_step] if self.config.method == 'pSGLD': fetch += [self.model.moment_op] fetch += [self.train_op] fetch_values = self.session.run( fetch, feed_dict = self.model.get_feed_dict(batch_chunk, step) ) [step, summary, log_prob] = fetch_values[:3] _end_time = time.time() return step, summary, log_prob, (_end_time - _start_time) def log_step_message(self, step, log_prob, step_time, is_train=True): if step_time == 0: step_time = 0.001 log_fn = (is_train and log.info or log.infov) log_fn((" [{split_mode:5s} step {step:4d}] " + #"loss: {loss:.4f} " + "log_prob: {log_prob:.4f} " + "({sec_per_batch:.3f} sec/batch)" ).format(split_mode=(is_train and 'train' or 'val'), step=step, log_prob=log_prob, sec_per_batch=step_time, ) )
def __init__(self, config, session): self.config = config self.session = session self.filepath = '%s_%d_%s_%s_%d' % ( config.method, config.n_components, config.kernel, repr(config.temperature), config.seed, ) self.res_dir = './results/%s' % self.filepath #for folder in [self.train_dir, self.fig_dir]: import glob for folder in [self.res_dir]: if not os.path.exists(folder): os.makedirs(folder) ## clean if self.config.clean: files = glob.glob(folder + '/*') for f in files: os.remove(f) #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir) if self.config.method == 'svgd': from model_svgd import SVGD self.model = SVGD(config) else: raise NotImplementedError # --- optimizer --- self.global_step = tf.Variable(0, name="global_step") self.learning_rate = config.learning_rate if config.lr_weight_decay: self.learning_rate = tf.train.exponential_decay( self.learning_rate, global_step=self.global_step, decay_steps=4000, decay_rate=0.8, staircase=True, name='decaying_learning_rate') self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=1) #self.summary_writer = tf.summary.FileWriter(self.train_dir) self.summary_writer = tf.summary.FileWriter(self.res_dir) self.checkpoint_secs = 300 # 5 min #self.train_op = self.optimize_adam(self.model.train_grads, self.model.train_vars, lr=self.learning_rate) self.train_op = self.optimize_adagrad(self.model.train_grads, self.model.train_vars, lr=self.learning_rate) tf.global_variables_initializer().run() if config.checkpoint is not None: self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint) if self.ckpt_path is not None: log.info("Checkpoint path: %s", self.ckpt_path) self.saver.restore(self.session, self.ckpt_path) log.info( "Loaded the pretrain parameters from the provided checkpoint path" )
class Trainer(object): def optimize_adagrad(self, train_grads, train_vars, lr=1e-2): optimizer = tf.train.RMSPropOptimizer( learning_rate=lr, decay=0.9) #adagrad with momentum clip_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in zip(train_grads, train_vars)] train_op = optimizer.apply_gradients(clip_grads) return train_op def optimize_adam(self, train_grads, train_vars, lr=1e-2): optimizer = tf.train.AdamOptimizer(lr) #grads = optimizer.compute_gradients(loss) clip_grads = [(tf.clip_by_norm(grad, 5), var) for grad, var in zip(train_grads, train_vars)] train_op = optimizer.apply_gradients(clip_grads) return train_op def __init__(self, config, session): self.config = config self.session = session self.filepath = '%s_%d_%s_%s_%d' % ( config.method, config.n_components, config.kernel, repr(config.temperature), config.seed, ) self.res_dir = './results/%s' % self.filepath #for folder in [self.train_dir, self.fig_dir]: import glob for folder in [self.res_dir]: if not os.path.exists(folder): os.makedirs(folder) ## clean if self.config.clean: files = glob.glob(folder + '/*') for f in files: os.remove(f) #log.infov("Train Dir: %s, Figure Dir: %s", self.train_dir, self.fig_dir) if self.config.method == 'svgd': from model_svgd import SVGD self.model = SVGD(config) else: raise NotImplementedError # --- optimizer --- self.global_step = tf.Variable(0, name="global_step") self.learning_rate = config.learning_rate if config.lr_weight_decay: self.learning_rate = tf.train.exponential_decay( self.learning_rate, global_step=self.global_step, decay_steps=4000, decay_rate=0.8, staircase=True, name='decaying_learning_rate') self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(max_to_keep=1) #self.summary_writer = tf.summary.FileWriter(self.train_dir) self.summary_writer = tf.summary.FileWriter(self.res_dir) self.checkpoint_secs = 300 # 5 min #self.train_op = self.optimize_adam(self.model.train_grads, self.model.train_vars, lr=self.learning_rate) self.train_op = self.optimize_adagrad(self.model.train_grads, self.model.train_vars, lr=self.learning_rate) tf.global_variables_initializer().run() if config.checkpoint is not None: self.ckpt_path = tf.train.latest_checkpoint(self.config.checkpoint) if self.ckpt_path is not None: log.info("Checkpoint path: %s", self.ckpt_path) self.saver.restore(self.session, self.ckpt_path) log.info( "Loaded the pretrain parameters from the provided checkpoint path" ) def evaluate(self, x_train, y_train, x_test, y_test): def _compute_energy(X): energy = [] n_x = len(X) max_batches = n_x // self.config.batch_size if n_x % self.config.batch_size != 0: max_batches += 1 for x_batch in tqdm(iter_data(X, size=self.config.batch_size), total=max_batches): #z = self.session.run(self.model.z, feed_dict=self.model.get_feed_dict(x_batch)) #energy.append( self.session.run(self.model.compute_energy(z, phi, mu, scale)) ) energy.append( self.session.run( self.model.energy, feed_dict=self.model.get_feed_dict(x_batch))) return np.concatenate(energy) eng_train = _compute_energy(x_train) eng_test = _compute_energy(x_test) assert len(eng_train) == len(x_train) and len(eng_test) == len( x_test), 'double check' combined_energy = np.concatenate((eng_train, eng_test)) thresh = np.percentile(combined_energy, 100 - 20) pred = (eng_test > thresh).astype(int) gt = y_test.astype(int) accuracy = accuracy_score(gt, pred) precision, recall, f_score, support = prf(gt, pred, average='binary') print( "Seed : {:3d}, Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f}" .format(self.config.seed, accuracy, precision, recall, f_score)) return accuracy, precision, recall, f_score def train(self): log.infov("Training Starts!") output_save_step = 1000 self.session.run(self.global_step.assign(0)) # reset global step from data_loader import load_kdd99 x_train, x_test, y_train, y_test = load_kdd99('kdd_cup.npz', self.config.seed) n_updates = 0 with open(self.res_dir + "/step.txt", 'w') as f: for e in range(1, 1 + self.config.n_epochs): x_train, y_train = shuffle(x_train, y_train) n_train = len(x_train) max_batches = n_train // self.config.batch_size #if n_train % self.config.batch_size != 0: max_batches+=1 for x_batch, y_batch in tqdm(iter_data( x_train, y_train, size=self.config.batch_size), total=max_batches): step, summary, loss, step_time = self.run_single_step( x_batch) self.summary_writer.add_summary(summary, global_step=n_updates) n_updates += 1 #if n_updates % 100 == 0: # eng, eng_chk = self.session.run([self.model.energy, self.model.energy_check], feed_dict=self.model.get_feed_dict(x_batch)) # print(np.mean(eng), np.mean(eng_chk)) if e % 10 == 0: accuracy, precision, recall, f_score = self.evaluate( x_train, y_train, x_test, y_test) f.write(self.filepath + ',' + repr(e) + ',' + repr(accuracy) + ',' + repr(precision) + ',' + repr(recall) + ',' + repr(f_score) + '\n') f.flush() # save model at the end self.saver.save(self.session, os.path.join(self.res_dir, 'model'), global_step=step) def run_single_step(self, x_batch): _start_time = time.time() fetch = [ self.global_step, self.summary_op, self.model.loss, self.train_op ] fetch_values = self.session.run( fetch, feed_dict=self.model.get_feed_dict(x_batch)) [step, summary, loss] = fetch_values[:3] _end_time = time.time() return step, summary, loss, (_end_time - _start_time) def log_step_message(self, step, loss, step_time, is_train=True): if step_time == 0: step_time = 0.001 log_fn = (is_train and log.info or log.infov) log_fn((" [{split_mode:5s} step {step:4d}] " + "loss: {loss:.4f} " + "({sec_per_batch:.3f} sec/batch)").format( split_mode=(is_train and 'train' or 'val'), step=step, loss=loss, sec_per_batch=step_time, ))