def __init__(self, config, exp_name='new_exp', loss_mode='1'): self.config = config self.graph = tf.Graph() gpu_options = tf.GPUOptions(allow_growth=True) configProto = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.InteractiveSession(config=configProto, graph=self.graph) self.exp_name = exp_name self.loss_mode = loss_mode train_data_file = config.train_data_file valid_data_file = config.valid_data_file test_data_file = config.test_data_file train_stud_data_file = config.train_stud_data_file self.train_dataset = Dataset() self.train_dataset.load_npy(train_data_file) self.valid_dataset = Dataset() self.valid_dataset.load_npy(valid_data_file) self.test_dataset = Dataset() self.test_dataset.load_npy(test_data_file) self.train_stud_dataset = Dataset() self.train_stud_dataset.load_npy(train_stud_data_file) self.reset() self._build_placeholder() self._build_graph() self.reward_baseline = None # average reward over episodes self.improve_baseline = None # averge improvement over steps
def _load_dataset(self): conf = self.config.data train_c_data_file = conf.train_c_data_file valid_c_data_file = conf.valid_c_data_file train_t_data_file = conf.train_t_data_file valid_t_data_file = conf.valid_t_data_file test_data_file = conf.test_data_file self.train_c_dataset = Dataset() self.train_c_dataset.load_npy(train_c_data_file) self.valid_c_dataset = Dataset() self.valid_c_dataset.load_npy(valid_c_data_file) self.train_t_dataset = Dataset() self.train_t_dataset.load_npy(train_t_data_file) self.valid_t_dataset = Dataset() self.valid_t_dataset.load_npy(valid_t_data_file) self.test_dataset = Dataset() self.test_dataset.load_npy(test_data_file)
def __init__(self, config, exp_name='new_exp_gan_grid'): self.config = config self.graph = tf.Graph() self.exp_name = exp_name gpu_options = tf.GPUOptions(allow_growth=True) configProto = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.InteractiveSession(config=configProto, graph=self.graph) # ----Loss_mode is only for DEBUG usage.---- self.train_dataset = Dataset() self.train_dataset.load_npy(config.train_data_file) self.valid_dataset = Dataset() self.valid_dataset.load_npy(config.valid_data_file) self._build_placeholder() self._build_graph() self.final_hq_baseline = None # average reward over episodes self.reset() self.fixed_noise_10000 = np.random.normal(size=(10000, config.dim_z))\ .astype('float32')
def _load_datasets(self): config = self.config train_ctrl_data_file = os.path.join(config.data_dir, config.train_ctrl_data_file) train_task_data_file = os.path.join(config.data_dir, config.train_task_data_file) valid_ctrl_data_file = os.path.join(config.data_dir, config.valid_ctrl_data_file) valid_task_data_file = os.path.join(config.data_dir, config.valid_task_data_file) test_data_file = os.path.join(config.data_dir, config.test_data_file) self.train_ctrl_dataset = Dataset() self.train_ctrl_dataset.load_npy(train_ctrl_data_file) self.valid_ctrl_dataset = Dataset() self.valid_ctrl_dataset.load_npy(valid_ctrl_data_file) self.train_task_dataset = Dataset() self.train_task_dataset.load_npy(train_task_data_file) self.valid_task_dataset = Dataset() self.valid_task_dataset.load_npy(valid_task_data_file) self.test_dataset = Dataset() self.test_dataset.load_npy(test_data_file)
def get_inception_score(mnist_model, images, splits=10): bs = 100 preds = [] total_samples = images.shape[0] n_batches = int(math.ceil(float(total_samples) / float(bs))) for i in range(n_batches): input = images[(i * bs):min((i + 1) * bs, total_samples)] target = np.zeros([input.shape[0], 10]) dataset = Dataset() dataset.build_from_data(input, target) fetch = [mnist_model.softmax] pred = mnist_model.valid(dataset, fetch) preds.append(pred[0]) preds = np.concatenate(preds, 0) scores = [] for i in range(splits): part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :] kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) kl = np.mean(np.sum(kl, 1)) scores.append(np.exp(kl)) return np.mean(scores), np.std(scores)
def __init__(self, config, exp_name='new_exp', loss_mode='1'): self.config = config self.graph = tf.Graph() gpu_options = tf.GPUOptions(allow_growth=True) configProto = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.InteractiveSession(config=configProto, graph=self.graph) # ----Loss_mode is only for DEBUG usage.---- # 0: only mse, 1: mse & l1 self.loss_mode = loss_mode self.exp_name = exp_name train_data_file = config.train_data_file train_stud_data_file = config.train_stud_data_file valid_data_file = config.valid_data_file self.train_dataset = Dataset() self.train_dataset.load_npy(train_data_file) self.valid_dataset = Dataset() self.valid_dataset.load_npy(valid_data_file) self.train_stud_dataset = Dataset() self.train_stud_dataset.load_npy(train_stud_data_file) self.reset() self._build_placeholder() self._build_graph() self.reward_baseline = None self.improve_baseline = None
class Cls(Basic_model): ''' Public variables (all task models should have these public variables): self.extra_info self.checkpoint_dir self.best_performance self.test_dataset ''' def __init__(self, config, exp_name='new_exp'): super(Cls, self).__init__(config, exp_name) self.reset() self._load_datasets() self._build_placeholder() self._build_graph() self.reward_baseline = None # average reward over episodes self.improve_baseline = None # averge improvement over steps def _load_datasets(self): config = self.config train_ctrl_data_file = os.path.join(config.data_dir, config.train_ctrl_data_file) train_task_data_file = os.path.join(config.data_dir, config.train_task_data_file) valid_ctrl_data_file = os.path.join(config.data_dir, config.valid_ctrl_data_file) valid_task_data_file = os.path.join(config.data_dir, config.valid_task_data_file) test_data_file = os.path.join(config.data_dir, config.test_data_file) self.train_ctrl_dataset = Dataset() self.train_ctrl_dataset.load_npy(train_ctrl_data_file) self.valid_ctrl_dataset = Dataset() self.valid_ctrl_dataset.load_npy(valid_ctrl_data_file) self.train_task_dataset = Dataset() self.train_task_dataset.load_npy(train_task_data_file) self.valid_task_dataset = Dataset() self.valid_task_dataset.load_npy(valid_task_data_file) self.test_dataset = Dataset() self.test_dataset.load_npy(test_data_file) def reset(self): # ----Reset the model.---- # TODO(haowen) The way to carry step number information should be # reconsiderd self.step_number = [0] self.previous_ce_loss = [0] * self.config.num_pre_loss self.previous_l1_loss = [0] * self.config.num_pre_loss self.previous_valid_acc = [0] * self.config.num_pre_loss self.previous_train_acc = [0] * self.config.num_pre_loss self.previous_valid_loss = [0] * self.config.num_pre_loss self.previous_train_loss = [0] * self.config.num_pre_loss self.mag_ce_grad = 0 self.mag_l1_grad = 0 # to control when to terminate the episode self.endurance = 0 self.collapse = False # The bigger the performance is, the better. In this case, performance # is accuracy. Naming it as performance in order to be compatible with # other tasks. self.best_performance = -1e10 self.improve_baseline = None def _build_placeholder(self): x_size = self.config.dim_input_task with self.graph.as_default(): self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32) self.y_plh = tf.placeholder(shape=[None], dtype=tf.int32) def _build_graph(self): x_size = self.config.dim_input_task h_size = self.config.dim_hidden_task y_size = self.config.dim_output_task lr = self.config.lr_task with self.graph.as_default(): w1 = weight_variable([x_size, h_size], name='w1') b1 = bias_variable([h_size], name='b1') hidden = tf.nn.relu(tf.matmul(self.x_plh, w1) + b1) w2 = weight_variable([h_size, y_size], name='w2') b2 = bias_variable([y_size], name='b2') self.pred = tf.matmul(hidden, w2) + b2 # define loss # cross entropy loss self.loss_ce = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y_plh, logits=self.pred, name='loss')) y_ = tf.argmax(self.pred, 1, output_type=tf.int32) correct_prediction = tf.equal(y_, self.y_plh) self.correct_prediction = correct_prediction self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) tvars = tf.trainable_variables() self.tvars = tvars l1_regularizer = tf.contrib.layers.l1_regularizer( scale=self.config.lambda_task, scope=None) self.loss_l1 = tf.contrib.layers.apply_regularization( l1_regularizer, tvars) self.loss_total = self.loss_ce + self.loss_l1 # ----Define update operation.---- optimizer = tf.train.AdamOptimizer(lr) ce_gvs = optimizer.compute_gradients(self.loss_ce, tvars) l1_gvs = optimizer.compute_gradients(self.loss_l1, tvars) total_gvs = optimizer.compute_gradients(self.loss_total, tvars) self.update_ce = optimizer.apply_gradients(ce_gvs) self.update_l1 = optimizer.apply_gradients(l1_gvs) self.update_total = optimizer.apply_gradients(total_gvs) self.update = [self.update_ce, self.update_l1] self.init = tf.global_variables_initializer() self.saver = tf.train.Saver() self.ce_grad = [grad for grad, _ in ce_gvs] self.l1_grad = [grad for grad, _ in l1_gvs] def valid(self, dataset=None): if not dataset: if self.config.args.task_mode == 'train': dataset = self.valid_ctrl_dataset elif self.config.args.task_mode == 'test': dataset = self.valid_task_dataset elif self.config.args.task_mode == 'baseline': dataset = self.valid_task_dataset else: logger.exception('Unexcepted task_mode: {}'.\ format(self.config.args.task_mode)) data = dataset.next_batch(dataset.num_examples) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} fetch = [self.loss_ce, self.accuracy, self.pred, self.y_plh] [loss_ce, acc, pred, gdth] = self.sess.run(fetch, feed_dict=feed_dict) return loss_ce, acc, pred, gdth def response(self, action): """ Given an action, return the new state, reward and whether dead Args: action: one hot encoding of actions Returns: state: shape = [dim_input_ctrl] reward: shape = [1] dead: boolean """ if self.config.args.task_mode == 'train': dataset = self.train_ctrl_dataset elif self.config.args.task_mode == 'test': dataset = self.train_task_dataset elif self.config.args.task_mode == 'baseline': dataset = self.train_task_dataset else: logger.exception('Unexcepted mode: {}'.\ format(self.config.args.task_mode)) data = dataset.next_batch(self.config.batch_size) sess = self.sess x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} a = np.argmax(np.array(action)) sess.run(self.update[a], feed_dict=feed_dict) fetch = [ self.loss_ce, self.loss_l1, self.accuracy, self.ce_grad, self.l1_grad ] loss_ce, loss_l1, acc, ce_grad, l1_grad = sess.run(fetch, feed_dict=feed_dict) valid_loss, valid_acc, _, _ = self.valid() train_loss, train_acc = loss_ce, acc # ----Update state.---- self.previous_ce_loss = self.previous_ce_loss[1:] + [loss_ce.tolist()] self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()] self.step_number[0] += 1 self.previous_valid_loss = self.previous_valid_loss[1:]\ + [valid_loss.tolist()] self.previous_train_loss = self.previous_train_loss[1:]\ + [train_loss.tolist()] self.previous_valid_acc = self.previous_valid_acc[1:]\ + [valid_acc.tolist()] self.previous_train_acc = self.previous_train_acc[1:]\ + [train_acc.tolist()] self.mag_ce_grad = self.get_grads_magnitude(ce_grad) self.mag_l1_grad = self.get_grads_magnitude(l1_grad) # Public variable self.extra_info = { 'valid_loss': valid_loss, 'train_loss': train_loss, 'valid_acc': valid_acc, 'train_acc': train_acc } reward = self.get_step_reward() # ----Early stop and record best result.---- dead = self.check_terminate() state = self.get_state() return state, reward, dead def check_terminate(self): # TODO(haowen) # Early stop and recording the best result # Episode terminates on two condition: # 1) Convergence: valid loss doesn't improve in endurance steps # 2) Collapse: action space collapse to one action (not implement yet) step = self.step_number[0] if step % self.config.valid_frequency_task == 0: self.endurance += 1 loss, acc, _, _ = self.valid() if acc > self.best_performance: self.best_step = self.step_number[0] self.best_performance = acc self.endurance = 0 if not self.config.args.task_mode == 'train': self.save_model(step, mute=True) if step > self.config.max_training_step: return True if self.config.stop_strategy_task == 'exceeding_endurance' and \ self.endurance > self.config.max_endurance_task: return True return False def get_step_reward(self): # TODO(haowen) Use the decrease of validation loss as step reward if self.improve_baseline is None: # ----First step, nothing to comparing with.---- improve = 0.1 else: improve = (self.previous_valid_loss[-2] - self.previous_valid_loss[-1]) # TODO(haowen) Try to use sqrt function instead of sign function # ----With baseline.---- if self.improve_baseline is None: self.improve_baseline = improve decay = self.config.reward_baseline_decay self.improve_baseline = decay * self.improve_baseline\ + (1 - decay) * improve value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5)) #value = abs(improve) / (abs(self.improve_baseline) + 1e-5) value = min(value, self.config.reward_max_value) return math.copysign(value, improve) * self.config.reward_step_ctrl def get_final_reward(self): ''' Return: reward: real reward adv: advantage, subtract the baseline from reward and then normalize it ''' assert self.best_performance > -1e10 + 1 acc = max(self.best_performance, 1 / self.config.dim_output_task) reward = -self.config.reward_c / acc # Calculate baseline if self.reward_baseline is None: self.reward_baseline = reward else: decay = self.config.reward_baseline_decay self.reward_baseline = decay * self.reward_baseline\ + (1 - decay) * reward # Calculate advantage adv = reward - self.reward_baseline adv = min(adv, self.config.reward_max_value) adv = max(adv, -self.config.reward_max_value) return reward, adv def get_state(self): abs_diff = [] rel_diff = [] if self.improve_baseline is None: ib = 1 else: ib = self.improve_baseline # relative difference between valid_loss and train_loss v = self.previous_valid_loss[-1] t = self.previous_train_loss[-1] abs_diff = v - t if t > 1e-6: rel_diff = (v - t) / t else: rel_diff = 0 state0 = rel_diff # normalized baseline improvement state1 = 1 + math.log(abs(ib) + 1e-4) / 10 # normalized mse ce_loss and l1_loss: ce_loss = self.previous_ce_loss[-1] state2 = ce_loss l1_loss = self.previous_l1_loss[-1] state3 = l1_loss # train_acc, valid_acc and their difference if self.previous_valid_acc[-1] == 0: state4 = 0 else: state4 = (self.previous_train_acc[-1] - self.previous_valid_acc[-1]) /\ self.previous_valid_acc[-1] # difference between magnitude of ce gradient and magnitude of l1 # gradient if self.mag_l1_grad == 0: state5 = 0 else: state5 = (self.mag_ce_grad - self.mag_l1_grad) / self.mag_l1_grad state = [state0, state1, state2, state3, state4, state5] return np.array(state, dtype='f')
class Reg(Basic_model): ''' Public variables (all task models should have these public variables): self.extra_info self.checkpoint_dir self.best_performance self.test_dataset ''' def __init__(self, config, exp_name='new_exp'): super(Reg, self).__init__(config, exp_name) self.reset() self._load_datasets() self._build_placeholder() self._build_graph() self.reward_baseline = None self.improve_baseline = None def _load_datasets(self): config = self.config train_ctrl_data_file = os.path.join(config.data_dir, config.train_ctrl_data_file) train_task_data_file = os.path.join(config.data_dir, config.train_task_data_file) valid_ctrl_data_file = os.path.join(config.data_dir, config.valid_ctrl_data_file) valid_task_data_file = os.path.join(config.data_dir, config.valid_task_data_file) test_data_file = os.path.join(config.data_dir, config.test_data_file) self.train_ctrl_dataset = Dataset() self.train_ctrl_dataset.load_npy(train_ctrl_data_file) self.valid_ctrl_dataset = Dataset() self.valid_ctrl_dataset.load_npy(valid_ctrl_data_file) self.train_task_dataset = Dataset() self.train_task_dataset.load_npy(train_task_data_file) self.valid_task_dataset = Dataset() self.valid_task_dataset.load_npy(valid_task_data_file) self.test_dataset = Dataset() self.test_dataset.load_npy(test_data_file) def reset(self): """ Reset the model """ # TODO(haowen) The way to carry step number information should be # reconsiderd self.step_number = [0] self.previous_mse_loss = [0] * self.config.num_pre_loss self.previous_l1_loss = [0] * self.config.num_pre_loss self.previous_valid_loss = [0] * self.config.num_pre_loss self.previous_train_loss = [0] * self.config.num_pre_loss self.mag_mse_grad = 0 self.mag_l1_grad = 0 # to control when to terminate the episode self.endurance = 0 # The bigger, the better. In this case, performance is negative loss. # Naming it as performance in order to be compatible with other tasks. self.best_performance = -1e10 self.collapse = False self.improve_baseline = None def _build_placeholder(self): x_size = self.config.dim_input_task with self.graph.as_default(): self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32) self.y_plh = tf.placeholder(shape=[None], dtype=tf.float32) def _build_graph(self): y_size = self.config.dim_output_task lr = self.config.lr_task with self.graph.as_default(): # ----quadratic equation---- # ---first order--- x_size = self.config.dim_input_task initial = tf.random_normal(shape=[x_size, 1], stddev=0.1, seed=1) w1 = tf.Variable(initial) sum1 = tf.matmul(self.x_plh, w1) # ---second order--- initial = tf.random_normal(shape=[x_size, x_size], stddev=0.01, seed=1) w2 = tf.Variable(initial) xx = tf.matmul(tf.reshape(self.x_plh, [-1, x_size, 1]), tf.reshape(self.x_plh, [-1, 1, x_size])) sum2 = tf.matmul(tf.reshape(xx, [-1, x_size * x_size]), tf.reshape(w2, [x_size * x_size, 1])) # NOTE(Haowen): Divide by 10 is important here to promise # convergence. It is some kind of cheating but we think it is fine # as a demo task. self.pred = sum1 + sum2 / 10 # Only for debug self.w1 = w1 self.w2 = w2 # define loss self.loss_mse = tf.reduce_mean( tf.square(tf.squeeze(self.pred) - self.y_plh)) # NOTE(Haowen): Somehow the l1 regularizers provided by tf # provide a better performance than self-designed regularizers # showing in the flowing 6 lines. #self.loss_l1 = self.config.lambda_task * ( # tf.reduce_sum(tf.reduce_sum(tf.abs(w2)))\ # + tf.reduce_sum(tf.reduce_sum(tf.abs(w1)))) tvars = tf.trainable_variables() l1_regularizer = tf.contrib.layers.l1_regularizer( scale=self.config.lambda_task, scope=None) self.loss_l1 = tf.contrib.layers.apply_regularization( l1_regularizer, tvars) self.loss_total = self.loss_mse + self.loss_l1 # ----Define update operation.---- optimizer = tf.train.AdamOptimizer(lr) mse_gvs = optimizer.compute_gradients(self.loss_mse, tvars) l1_gvs = optimizer.compute_gradients(self.loss_l1, tvars) total_gvs = optimizer.compute_gradients(self.loss_total, tvars) self.update_mse = optimizer.apply_gradients(mse_gvs) self.update_l1 = optimizer.apply_gradients(l1_gvs) self.update_total = optimizer.apply_gradients(total_gvs) self.update = [self.update_mse, self.update_l1] self.init = tf.global_variables_initializer() self.saver = tf.train.Saver() self.mse_grad = [grad for grad, _ in mse_gvs] self.l1_grad = [grad for grad, _ in l1_gvs] def valid(self, dataset=None): if not dataset: if self.config.args.task_mode == 'train': dataset = self.valid_ctrl_dataset elif self.config.args.task_mode == 'test': dataset = self.valid_task_dataset elif self.config.args.task_mode == 'baseline': dataset = self.valid_task_dataset else: logger.exception('Unexcepted task_mode: {}'.\ format(self.config.args.task_mode)) data = dataset.next_batch(dataset.num_examples) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} fetch = [self.loss_mse, self.pred, self.y_plh] [loss_mse, pred, gdth] = self.sess.run(fetch, feed_dict=feed_dict) return loss_mse, pred, gdth def response(self, action): """ Given an action, return the new state, reward and whether dead Args: action: one hot encoding of actions Returns: state: shape = [dim_input_ctrl] reward: shape = [1] dead: boolean """ if self.config.args.task_mode == 'train': dataset = self.train_ctrl_dataset elif self.config.args.task_mode == 'test': dataset = self.train_task_dataset elif self.config.args.task_mode == 'baseline': dataset = self.train_task_dataset else: logger.exception('Unexcepted mode: {}'.\ format(self.config.args.task_mode)) data = dataset.next_batch(self.config.batch_size) sess = self.sess x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} a = np.argmax(np.array(action)) sess.run(self.update[a], feed_dict=feed_dict) fetch = [self.loss_mse, self.loss_l1, self.mse_grad, self.l1_grad] loss_mse, loss_l1, mse_grad, l1_grad = sess.run(fetch, feed_dict=feed_dict) valid_loss, _, _ = self.valid() train_loss = loss_mse # ----Update state.---- self.previous_mse_loss = self.previous_mse_loss[1:] + [ loss_mse.tolist() ] self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()] self.step_number[0] += 1 self.previous_valid_loss = self.previous_valid_loss[1:]\ + [valid_loss.tolist()] self.previous_train_loss = self.previous_train_loss[1:]\ + [train_loss.tolist()] self.mag_mse_grad = self.get_grads_magnitude(mse_grad) self.mag_l1_grad = self.get_grads_magnitude(l1_grad) # Public variable self.extra_info = { 'valid_loss': self.previous_valid_loss[-1], 'train_loss': self.previous_train_loss[-1] } reward = self.get_step_reward() # ----Early stop and record best result.---- dead = self.check_terminate() state = self.get_state() return state, reward, dead def check_terminate(self): # TODO(haowen) # Episode terminates on two condition: # 1) Convergence: valid loss doesn't improve in endurance steps # 2) Collapse: action space collapse to one action (not implement yet) # Return True if the training should stop, otherwise return False step = self.step_number[0] if step % self.config.valid_frequency_task == 0: self.endurance += 1 loss, _, _ = self.valid() if -loss > self.best_performance: self.best_step = self.step_number[0] self.best_performance = -loss self.endurance = 0 if not self.config.args.task_mode == 'train': self.save_model(step, mute=True) if step > self.config.max_training_step: return True if self.config.stop_strategy_task == 'exceeding_endurance' and \ self.endurance > self.config.max_endurance_task: return True return False def get_step_reward(self): # TODO(haowen) Use the decrease of validation loss as step reward if self.improve_baseline is None: # ----First step, nothing to compare with.---- improve = 0.1 else: improve = (self.previous_valid_loss[-2] - self.previous_valid_loss[-1]) # TODO(haowen) Try to use sqrt function instead of sign function if self.improve_baseline is None: self.improve_baseline = improve decay = self.config.reward_baseline_decay self.improve_baseline = decay * self.improve_baseline\ + (1 - decay) * improve value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5)) #value = abs(improve) / (abs(self.improve_baseline) + 1e-5) value = min(value, self.config.reward_max_value) return math.copysign(value, improve) * self.config.reward_step_ctrl def get_final_reward(self): ''' Return: reward: real reward adv: advantage, subtract the baseline from reward and then normalize it ''' assert self.best_performance > -1e10 + 1 loss_mse = -self.best_performance reward = self.config.reward_c / loss_mse # Calculate baseline if self.reward_baseline is None: self.reward_baseline = reward else: decay = self.config.reward_baseline_decay self.reward_baseline = decay * self.reward_baseline\ + (1 - decay) * reward # Calculate advantage adv = reward - self.reward_baseline adv = min(adv, self.config.reward_max_value) adv = max(adv, -self.config.reward_max_value) return reward, adv def get_state(self): abs_diff = [] rel_diff = [] if self.improve_baseline is None: ib = 1 else: ib = self.improve_baseline # relative difference between valid_loss and train_loss: # we only consider the latest step v = self.previous_valid_loss[-1] t = self.previous_train_loss[-1] abs_diff = v - t if t > 1e-6: rel_diff = (v - t) / t else: rel_diff = 0 state0 = rel_diff # normalized baseline improvement state1 = 1 + math.log(abs(ib) + 1e-5) / 12 # normalized mse loss mse_loss = self.previous_mse_loss[-1] state2 = min(1, mse_loss / 20) - 0.5 # normalized l1 loss l1_loss = self.previous_l1_loss[-1] state3 = l1_loss - 1 # difference between magnitude of mse gradient and magnitude of l1 # gradient state4 = self.mag_mse_grad - self.mag_l1_grad state = [state0, state1, state2, state3, state4] return np.array(state, dtype='f')
def main(): config_path = os.path.join(root_path, 'config/skirt_length.cfg') config = Parser(config_path) num_classes = 6 dataset = Dataset(config, tf.estimator.ModeKeys.TRAIN) dataset_valid = Dataset(config, tf.estimator.ModeKeys.EVAL) dataset_test = Dataset(config, tf.estimator.ModeKeys.PREDICT) tf.reset_default_graph() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95, allow_growth=True) sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) model = Model(config, num_classes) model.load_pretrain(sess) saver = tf.train.Saver(max_to_keep=5) best_acc = 0 best_loss = 100 test_acc = 0 test_loss = 0 valid_acc = 0 valid_loss = 0 train_acc = 0 train_loss = 0 timedelay = 0 steps = 0 batch_size = config.batch_size start_time = time.time() valid_samples = dataset_valid.next_batch(batch_size) while (timedelay < config.timedelay_num) and (steps < config.max_step): samples = dataset.next_batch(batch_size) steps += 1 model.train_one_step(sess, samples) if steps % config.summary_steps == 0: train_loss, train_results = model.test_by_batch(sess, samples) train_acc = evaluate_metrics(train_results) valid_loss, valid_results = model.test_by_batch(sess, valid_samples) valid_acc = evaluate_metrics(valid_results) if best_loss > valid_loss or best_acc < valid_acc: timedelay = 0 saver.save(sess, os.path.join(config.exp_dir, 'E01/fashionAI'), global_step=steps) else: timedelay += 1 if best_acc < valid_acc: best_acc = valid_acc if best_loss > valid_loss: best_loss = valid_loss sys.stdout.write('\nBatches: %d' % steps) sys.stdout.write('\nBatch Time: %4fs' % (1.0 * (time.time() - start_time) / config.summary_steps)) sys.stdout.write('\nTrain acc: %.6f' % train_acc) sys.stdout.write('\tTrain Loss: %.6f' % train_loss) sys.stdout.write('\nValid acc: %.6f' % valid_acc) sys.stdout.write('\tValid Loss: %.6f' % valid_loss) sys.stdout.write('\nBest acc: %.6f' % best_acc) sys.stdout.write('\tBest Loss: %.6f' % best_loss) sys.stdout.write('\n\n') #print('\nBatches: %d' % steps, end='') #print('\nBatch Time: %4fs' % (1.0 * (time.time() - start_time) / config.summary_steps), end='') #print('\nTrain acc: %.6f' % train_acc, end='') #print('\tTrain Loss: %.6f' % train_loss, end='') #print('\nValid acc: %.6f' % valid_acc, end='') #print('\tValid Loss: %.6f' % valid_loss, end='') #print('\nBest acc: %.6f' % best_acc, end='') #print('\tBest Loss: %.6f' % best_loss, end='') #print('\n\n') start_time = time.time() print('\nModel saved at {}'.format(config.exp_dir))
def _load_datasets(self): config = self.config self.train_datasets = [] self.valid_datasets = [] self.test_datasets = [] for task_num, task in enumerate(config.task_names): train_datasets = Dataset() valid_datasets = Dataset() test_datasets = Dataset() train_datasets.load_json(config.train_data_files[task_num]) valid_datasets.load_json(config.valid_data_files[task_num]) test_datasets.load_json(config.test_data_files[task_num]) # Only use a quarter of the dataset to fasten the training. train_datasets.resize(int(train_datasets._num_examples * 0.7), shuffle=True) valid_datasets.resize(int(valid_datasets._num_examples * 0.7), shuffle=True) test_datasets.resize(int(test_datasets._num_examples * 0.7), shuffle=True) self.train_datasets.append(train_datasets) self.valid_datasets.append(valid_datasets) self.test_datasets.append(test_datasets)
class Reg(Basic_model): def __init__(self, config, exp_name='new_exp', loss_mode='1'): self.config = config self.graph = tf.Graph() gpu_options = tf.GPUOptions(allow_growth=True) configProto = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.InteractiveSession(config=configProto, graph=self.graph) # ----Loss_mode is only for DEBUG usage.---- # 0: only mse, 1: mse & l1 self.loss_mode = loss_mode self.exp_name = exp_name train_data_file = config.train_data_file train_stud_data_file = config.train_stud_data_file valid_data_file = config.valid_data_file self.train_dataset = Dataset() self.train_dataset.load_npy(train_data_file) self.valid_dataset = Dataset() self.valid_dataset.load_npy(valid_data_file) self.train_stud_dataset = Dataset() self.train_stud_dataset.load_npy(train_stud_data_file) self.reset() self._build_placeholder() self._build_graph() self.reward_baseline = None self.improve_baseline = None def reset(self): """ Reset the model """ # TODO(haowen) The way to carry step number information should be # reconsiderd self.step_number = [0] self.previous_mse_loss = [0] * self.config.num_pre_loss self.previous_l1_loss = [0] * self.config.num_pre_loss self.previous_action = [0, 0] self.previous_valid_loss = [0] * self.config.num_pre_loss self.previous_train_loss = [0] * self.config.num_pre_loss self.mag_mse_grad = 0 self.mag_l1_grad = 0 # to control when to terminate the episode self.endurance = 0 self.best_loss = 1e10 self.improve_baseline = None def _build_placeholder(self): x_size = self.config.dim_input_stud with self.graph.as_default(): self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32) self.y_plh = tf.placeholder(shape=[None], dtype=tf.float32) def _build_graph(self): h_size = self.config.dim_hidden_stud y_size = self.config.dim_output_stud lr = self.config.lr_stud with self.graph.as_default(): # ----quadratic equation---- # ---first order--- x_size = self.config.dim_input_stud initial = tf.random_normal(shape=[x_size, 1], stddev=0.1, seed=1) w1 = tf.Variable(initial) sum1 = tf.matmul(self.x_plh, w1) # ---second order--- initial = tf.random_normal(shape=[x_size, x_size], stddev=0.01, seed=1) w2 = tf.Variable(initial) xx = tf.matmul(tf.reshape(self.x_plh, [-1, x_size, 1]), tf.reshape(self.x_plh, [-1, 1, x_size])) sum2 = tf.matmul(tf.reshape(xx, [-1, x_size * x_size]), tf.reshape(w2, [x_size * x_size, 1])) # NOTE(Haowen): Divide by 10 is important here to promise # convergence. self.pred = sum1 + sum2 / 10 self.w1 = w1 self.w2 = w2 # define loss self.loss_mse = tf.reduce_mean( tf.square(tf.squeeze(self.pred) - self.y_plh)) # NOTE(Haowen): Somehow the l1 regularizers provided by tf # provide a better performance than self-designed regularizers # showing in the flowing 6 lines. #self.loss_l1 = self.config.lambda1_stud * ( # tf.reduce_sum(tf.reduce_sum(tf.abs(w2)))\ # + tf.reduce_sum(tf.reduce_sum(tf.abs(w1)))) tvars = tf.trainable_variables() l1_regularizer = tf.contrib.layers.l1_regularizer( scale=self.config.lambda1_stud, scope=None) self.loss_l1 = tf.contrib.layers.apply_regularization( l1_regularizer, tvars) if self.loss_mode == '0': self.loss_total = self.loss_mse print('mse loss') elif self.loss_mode == '1': self.loss_total = self.loss_mse + self.loss_l1 print('mse loss and l1 loss') print('lambda1:', self.config.lambda1_stud) else: raise NotImplementedError # ----Define update operation.---- optimizer = tf.train.AdamOptimizer(lr) mse_gvs = optimizer.compute_gradients(self.loss_mse, tvars) l1_gvs = optimizer.compute_gradients(self.loss_l1, tvars) total_gvs = optimizer.compute_gradients(self.loss_total, tvars) self.update_mse = optimizer.apply_gradients(mse_gvs) self.update_l1 = optimizer.apply_gradients(l1_gvs) self.update_total = optimizer.apply_gradients(total_gvs) self.init = tf.global_variables_initializer() self.mse_grad = [grad for grad, _ in mse_gvs] self.l1_grad = [grad for grad, _ in l1_gvs] def train(self): """ Optimize mse loss, l1 loss at the same time """ data = self.train_dataset.next_batch(self.config.batch_size) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} loss, _ = self.sess.run([self.loss_total, self.update_total], feed_dict=feed_dict) return loss def valid(self, dataset=None): """ test on validation set """ if not dataset: dataset = self.valid_dataset data = dataset.next_batch(dataset.num_examples) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} fetch = [self.loss_mse, self.pred, self.y_plh] [loss_mse, pred, gdth] = self.sess.run(fetch, feed_dict=feed_dict) loss_mse_np = np.mean(np.square(pred - gdth)) return loss_mse, pred, gdth def response(self, action, lr, mode='TRAIN'): """ Given an action, return the new state, reward and whether dead Args: action: one hot encoding of actions lr: when lr needs decay Returns: state: shape = [dim_state_rl] reward: shape = [1] dead: boolean """ if mode == 'TRAIN': dataset = self.train_dataset else: dataset = self.train_stud_dataset data = dataset.next_batch(self.config.batch_size) sess = self.sess x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} if action[0] == 1: # ----Update mse loss.---- sess.run(self.update_mse, feed_dict=feed_dict) elif action[1] == 1: # ----Update l1 loss.---- sess.run(self.update_l1, feed_dict=feed_dict) fetch = [self.loss_mse, self.loss_l1, self.mse_grad, self.l1_grad] loss_mse, loss_l1, mse_grad, l1_grad = sess.run(fetch, feed_dict=feed_dict) valid_loss, _, _ = self.valid() train_loss, _, _ = self.valid(dataset=dataset) # ----Update state.---- self.previous_mse_loss = self.previous_mse_loss[1:] + [ loss_mse.tolist() ] self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()] self.previous_action = action.tolist() self.step_number[0] += 1 self.previous_valid_loss = self.previous_valid_loss[1:]\ + [valid_loss.tolist()] self.previous_train_loss = self.previous_train_loss[1:]\ + [train_loss.tolist()] self.mag_mse_grad = self.get_grads_magnitude(mse_grad) self.mag_l1_grad = self.get_grads_magnitude(l1_grad) reward = self.get_step_reward() # ----Early stop and record best result.---- dead = self.check_terminate() state = self.get_state() return state, reward, dead def check_terminate(self): # TODO(haowen) # Episode terminates on two condition: # 1) Convergence: valid loss doesn't improve in endurance steps # 2) Collapse: action space collapse to one action (not implement yet) step = self.step_number[0] if step % self.config.valid_frequency_stud == 0: self.endurance += 1 loss, _, _ = self.valid() if loss < self.best_loss: self.best_step = self.step_number[0] self.best_loss = loss self.endurance = 0 if step > self.config.max_training_step: return True if self.config.stop_strategy_stud == 'exceeding_endurance' and \ self.endurance > self.config.max_endurance_stud: return True elif self.config.stop_strategy_stud == 'prescribed_conv_target': if self.best_loss < self.config.conv_target_stud: return True return False def get_step_reward(self): # TODO(haowen) Use the decrease of validation loss as step reward if self.improve_baseline is None: # ----First step, nothing to compare with.---- improve = 0.1 else: improve = (self.previous_valid_loss[-2] - self.previous_valid_loss[-1]) # TODO(haowen) Try to use sqrt function instead of sign function if self.improve_baseline is None: self.improve_baseline = improve decay = self.config.reward_baseline_decay self.improve_baseline = decay * self.improve_baseline\ + (1 - decay) * improve #TODO(haowen) Remove nonlinearity value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5)) #value = abs(improve) / (abs(self.improve_baseline) + 1e-5) value = min(value, self.config.reward_max_value) return math.copysign(value, improve) * self.config.reward_step_rl def get_final_reward(self): assert self.best_loss < 1e10 - 1 loss_mse = self.best_loss reward = self.config.reward_c / loss_mse if self.reward_baseline is None: self.reward_baseline = reward logger.info('reward: {}'.format(reward)) logger.info('reward_baseline: {}'.format(self.reward_baseline)) decay = self.config.reward_baseline_decay adv = reward - self.reward_baseline adv = min(adv, self.config.reward_max_value) adv = max(adv, -self.config.reward_max_value) # TODO(haowen) Try to use maximum instead of shift average as baseline # Result: doesn't seem to help too much # ----Shift average---- self.reward_baseline = decay * self.reward_baseline\ + (1 - decay) * reward # ----Maximun---- #if self.reward_baseline < reward: # self.reward_baseline = reward return reward, adv def get_state(self): abs_diff = [] rel_diff = [] if self.improve_baseline is None: ib = 1 else: ib = self.improve_baseline for v, t in zip(self.previous_valid_loss, self.previous_train_loss): abs_diff.append(v - t) if t > 1e-6: rel_diff.append((v - t) / t) else: rel_diff.append(0) state = ([rel_diff[-1] * 10] + _normalize1([abs(ib)]) + _normalize2(self.previous_mse_loss[-1:]) + _normalize3(self.previous_l1_loss[-1:]) + [self.mag_mse_grad - self.mag_l1_grad]) return np.array(state, dtype='f')
class Reg(Basic_model): def __init__(self, config, sess, exp_name='new_exp', loss_mode='1'): super(Reg, self).__init(config, sess, exp_name) # ----Loss_mode is only for DEBUG usage.---- # 0: only mse, 1: mse & l1 self.update_steps = 0 self.loss_mode = loss_mode self.exp_name = exp_name self._load_dataset() with tf.variable_scope(exp_name): self._build_placeholder() self._build_graph() self.reset() self.reward_baseline = None self.improve_baseline = None def reset(self): """ Reset the model """ # TODO(haowen) The way to carry step number information should be # reconsiderd self.update_steps = 0 self.previous_action = [0, 0] num_pre = self.config.task.num_pre_loss self.previous_mse_loss = deque(maxlen=num_pre) self.previous_l1_loss = deque(maxlen=num_pre) self.previous_valid_loss = deque(maxlen=num_pre) self.previous_train_loss = deque(maxlen=num_pre) self.improve_history = deque(maxlen=20) # to control when to terminate the episode self.endurance = 0 self.best_loss = 1e10 self.improve_baseline = None def _load_dataset(self): conf = self.config.data train_c_data_file = conf.train_c_data_file valid_c_data_file = conf.valid_c_data_file train_t_data_file = conf.train_t_data_file valid_t_data_file = conf.valid_t_data_file test_data_file = conf.test_data_file self.train_c_dataset = Dataset() self.train_c_dataset.load_npy(train_c_data_file) self.valid_c_dataset = Dataset() self.valid_c_dataset.load_npy(valid_c_data_file) self.train_t_dataset = Dataset() self.train_t_dataset.load_npy(train_t_data_file) self.valid_t_dataset = Dataset() self.valid_t_dataset.load_npy(valid_t_data_file) self.test_dataset = Dataset() self.test_dataset.load_npy(test_data_file) def _build_placeholder(self): x_size = self.config.task.dim_input with self.variable_scope('placeholder'): self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32) self.y_plh = tf.placeholder(shape=[None], dtype=tf.float32) def _build_graph(self): config = self.config.task h_size = config.dim_hidden y_size = config.dim_output lr = config.lr with self.variable_scope('quadratic'): # ----quadratic equation---- # ---first order--- x_size = config.dim_input initial = tf.random_normal(shape=[x_size, 1], stddev=0.1, seed=1) w1 = tf.Variable(initial) sum1 = tf.matmul(self.x_plh, w1) # ---second order--- initial = tf.random_normal(shape=[x_size, x_size], stddev=0.01, seed=1) w2 = tf.Variable(initial) xx = tf.matmul(tf.reshape(self.x_plh, [-1, x_size, 1]), tf.reshape(self.x_plh, [-1, 1, x_size])) sum2 = tf.matmul(tf.reshape(xx, [-1, x_size * x_size]), tf.reshape(w2, [x_size * x_size, 1])) # NOTE(Haowen): Divide by 10 is important here to promise # convergence. self.pred = sum1 + sum2 self.w1 = w1 self.w2 = w2 # define loss self.loss_mse = tf.reduce_mean( tf.square(tf.squeeze(self.pred) - self.y_plh)) tvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.exp_name) l1_regularizer = tf.contrib.layers.l1_regularizer( scale=config.lambda1, scope=None) self.loss_l1 = tf.contrib.layers.apply_regularization( l1_regularizer, tvars) l2_regularizer = tf.contrib.layers.l2_regularizer( scale=config.lambda2, scope=None) self.loss_l2 = tf.contrib.layers.apply_regularization( l2_regularizer, tvars) if self.loss_mode == '0': self.loss_total = self.loss_mse print('mse loss') elif self.loss_mode == '1': self.loss_total = self.loss_mse + self.loss_l1 print('mse loss and l1 loss') print('lambda1:', config.lambda1) elif self.loss_mode == '2': self.loss_total = self.loss_mse + self.loss_l1 + self.loss_l2 print('mse loss, l1 loss and l2 loss') print('lambda1: {}, lambda2: {}'.format( config.lambda1, config.lambda2)) else: raise NotImplementedError # ----Define update operation.---- self.update_mse = tf.train.GradientDescentOptimizer(lr).\ minimize(self.loss_mse) self.update_l1 = tf.train.GradientDescentOptimizer(lr).\ minimize(self.loss_l1) self.update_l2 = tf.train.GradientDescentOptimizer(lr).\ minimize(self.loss_l2) self.update_total = tf.train.GradientDescentOptimizer(lr).\ minimize(self.loss_total) self.tvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.exp_name) self.init = tf.global_variables_initializer(self.tvars) def update_total(self): data = self.train_t_dataset.next_batch(self.config.task.batch_size) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} loss, _ = self.sess.run([self.loss_total, self.update_total], feed_dict=feed_dict) return loss def valid(self, dataset=None): """ test on given dataset """ if not dataset: dataset = self.valid_t_dataset data = dataset.next_batch(dataset.num_examples) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} fetch = [self.loss_mse, self.pred, self.y_plh] loss_mse, pred, gdth = self.sess.run(fetch, feed_dict=feed_dict) return loss_mse, pred, gdth def response(self, action, mode='TRAIN'): """ Given an action, return the new state, reward and whether dead Args: action: one hot encoding of actions Returns: state: shape = [dim_state_rl] reward: shape = [1] dead: boolean """ if mode == 'TRAIN': dataset = self.train_c_dataset dataset_v = self.valid_c_dataset else: dataset = self.train_t_dataset dataset_v = self.valid_t_dataset data = dataset.next_batch(self.config.task.batch_size) sess = self.sess x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} fetch = [self.loss_mse, self.loss_l1] if action == 0: # ----Update mse loss.---- sess.run(self.update_mse, feed_dict=feed_dict) elif action == 1: # ----Update l1 loss.---- sess.run(self.update_l1, feed_dict=feed_dict) elif action == 2: # ----Update l2 loss.---- sess.run(self.update_l2, feed_dict=feed_dict) loss_mse, loss_l1 = sess.run(fetch, feed_dict=feed_dict) valid_loss, _, _ = self.valid(dataset=dataset_v) train_loss, _, _ = self.valid(dataset=dataset) # ----Update state.---- self.previous_mse_loss = self.previous_mse_loss[1:] + [ loss_mse.tolist() ] self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()] self.previous_action = action.tolist() self.update_steps += 1 self.previous_valid_loss = self.previous_valid_loss[1:]\ + [valid_loss.tolist()] self.previous_train_loss = self.previous_train_loss[1:]\ + [train_loss.tolist()] reward = self.get_step_reward() # ----Early stop and record best result.---- dead = self.check_terminate() state = self.get_state() return state, reward, dead def check_terminate(self): # TODO(haowen) # Episode terminates on two condition: # 1) Convergence: valid loss doesn't improve in endurance steps # 2) Collapse: action space collapse to one action (not implement yet) step = self.update_steps if step % self.config.task.valid_frequency == 0: self.endurance += 1 loss, _, _ = self.valid() if loss < self.best_loss: self.best_step = self.update_steps self.best_loss = loss self.endurance = 0 if self.endurance > self.config.task.max_endurance: return True return False def get_step_reward(self): # TODO(haowen) Use the decrease of validation loss as step reward if self.improve_baseline is None: # ----First step, nothing to compare with.---- improve = 0.1 else: improve = (self.previous_valid_loss[-2] - self.previous_valid_loss[-1]) self.improve_history.append(improve) self.improve_baseline = mean(self.improve_history) #TODO(haowen) Remove nonlinearity value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5)) #value = abs(improve) / (abs(self.improve_baseline) + 1e-5) value = min(value, self.config.meta.reward_max_value) return math.copysign(value, improve) def get_final_reward(self): assert self.best_loss < 1e10 - 1 loss_mse = self.best_loss reward = self.config.meta.reward_c / loss_mse if self.reward_baseline is None: self.reward_baseline = reward decay = self.config.meta.reward_baseline_decay adv = reward - self.reward_baseline adv = min(adv, self.config.meta.reward_max_value) adv = max(adv, -self.config.meta.reward_max_value) # TODO(haowen) Try to use maximum instead of shift average as baseline # Result: doesn't seem to help too much # ----Shift average---- self.reward_baseline = decay * self.reward_baseline\ + (1 - decay) * reward # ----Maximun---- #if self.reward_baseline < reward: # self.reward_baseline = reward return reward, adv def get_state(self): abs_diff = [] rel_diff = [] if self.improve_baseline is None: ib = 1 else: ib = self.improve_baseline for v, t in zip(self.previous_valid_loss, self.previous_train_loss): abs_diff.append(v - t) if t > 1e-6: rel_diff.append(v / t) else: rel_diff.append(1) state = ([math.log(rel_diff[-1])] + _normalize1([abs(ib)]) + _normalize2(self.previous_mse_loss[-1:]) + self.previous_l1_loss[-1:]) return np.array(state, dtype='f')
class Gan_grid(Gan): def __init__(self, config, exp_name='new_exp_gan_grid'): self.config = config self.graph = tf.Graph() self.exp_name = exp_name gpu_options = tf.GPUOptions(allow_growth=True) configProto = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.InteractiveSession(config=configProto, graph=self.graph) # ----Loss_mode is only for DEBUG usage.---- self.train_dataset = Dataset() self.train_dataset.load_npy(config.train_data_file) self.valid_dataset = Dataset() self.valid_dataset.load_npy(config.valid_data_file) self._build_placeholder() self._build_graph() self.final_hq_baseline = None # average reward over episodes self.reset() self.fixed_noise_10000 = np.random.normal(size=(10000, config.dim_z))\ .astype('float32') def reset(self): # ----Reset the model.---- # TODO(haowen) The way to carry step number information should be # reconsiderd self.step_number = 0 self.ema_gen_cost = None self.ema_disc_cost_real = None self.ema_disc_cost_fake = None self.prst_gen_cost = None self.prst_disc_cost_real = None self.prst_disc_cost_fake = None self.hq = 0 self.entropy = 0 # to control when to terminate the episode self.endurance = 0 self.best_hq = 0 self.hq_baseline = 0 self.collapse = False self.previous_action = -1 self.same_action_count = 0 def _build_placeholder(self): with self.graph.as_default(): dim_x = self.config.dim_x dim_z = self.config.dim_z bs = self.config.batch_size self.real_data = tf.placeholder(tf.float32, shape=[None, dim_x], name='real_data') self.noise = tf.placeholder(tf.float32, shape=[None, dim_z], name='noise') self.is_training = tf.placeholder(tf.bool, name='is_training') def _build_graph(self): dim_x = self.config.dim_x dim_z = self.config.dim_z batch_size = self.config.batch_size lr = self.config.lr_stud beta1 = self.config.beta1 beta2 = self.config.beta2 with self.graph.as_default(): real_data = self.real_data fake_data = self.generator(self.noise) disc_real = self.discriminator(real_data) disc_fake = self.discriminator(fake_data, reuse=True) gen_cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_fake, labels=tf.ones_like(disc_fake))) disc_cost_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_fake, labels=tf.zeros_like(disc_fake))) disc_cost_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=disc_real, labels=tf.ones_like(disc_real))) disc_cost = (disc_cost_fake + disc_cost_real) / 2. tvars = tf.trainable_variables() gen_tvars = [v for v in tvars if 'Generator' in v.name] disc_tvars = [v for v in tvars if 'Discriminator' in v.name] gen_grad = tf.gradients(gen_cost, gen_tvars) disc_grad = tf.gradients(disc_cost, disc_tvars) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=beta1, beta2=beta2) gen_train_op = optimizer.apply_gradients(zip(gen_grad, gen_tvars)) disc_train_op = optimizer.apply_gradients( zip(disc_grad, disc_tvars)) self.saver = tf.train.Saver() self.init = tf.global_variables_initializer() self.fake_data = fake_data self.gen_train_op = gen_train_op self.disc_train_op = disc_train_op self.update = [gen_train_op, disc_train_op] self.gen_cost = gen_cost self.gen_grad = gen_grad self.gen_tvars = gen_tvars self.disc_cost_fake = disc_cost_fake self.disc_cost_real = disc_cost_real self.disc_grad = disc_grad self.disc_tvars = disc_tvars self.disc_real = disc_real self.disc_fake = disc_fake def generator(self, input): dim_x = self.config.dim_x n_hidden = self.config.n_hidden_gen with tf.variable_scope('Generator'): output = layers.linear(input, n_hidden, name='LN1', stdev=0.2) output = layers.batchnorm(output, is_training=self.is_training, name='BN1') output = tf.nn.relu(output) output = layers.linear(output, n_hidden, name='LN2', stdev=0.2) output = layers.batchnorm(output, is_training=self.is_training, name='BN2') output = tf.nn.relu(output) output = layers.linear(output, dim_x, name='LN3', stdev=0.2) #output = slim.fully_connected(input, n_hidden, # activation_fn=tf.nn.relu) #output = slim.fully_connected(output, n_hidden, # activation_fn=tf.nn.relu) #output = slim.fully_connected(output, dim_x, activation_fn=None) return output def discriminator(self, input, reuse=False): n_hidden = self.config.n_hidden_disc with tf.variable_scope('Discriminator') as scope: if reuse: scope.reuse_variables() output = layers.linear(input, n_hidden, name='LN1', stdev=0.2) output = tf.nn.relu(output) output = layers.linear(output, 1, name='LN2', stdev=0.2) #output = slim.fully_connected(input, n_hidden, # activation_fn=tf.nn.relu) #output = slim.fully_connected(output, 1, activation_fn=None) return tf.reshape(output, [-1]) def train(self, save_model=False): sess = self.sess config = self.config batch_size = config.batch_size dim_z = config.dim_z valid_frequency = config.valid_frequency_stud print_frequency = config.print_frequency_stud best_hq = 0 hq_baseline = 0 best_entropy = 0 endurance = 0 decay = config.metric_decay steps_per_iteration = config.disc_iters + config.gen_iters for step in range(0, config.max_training_step, steps_per_iteration): # ----Update D network.---- for i in range(config.disc_iters): data = self.train_dataset.next_batch(batch_size) x = data['input'] z = np.random.normal(size=[batch_size, dim_z]).astype( np.float32) feed_dict = { self.noise: z, self.real_data: x, self.is_training: True } sess.run(self.disc_train_op, feed_dict=feed_dict) # ----Update G network.---- for i in range(config.gen_iters): z = np.random.normal(size=[batch_size, dim_z]).astype( np.float32) feed_dict = {self.noise: z, self.is_training: True} sess.run(self.gen_train_op, feed_dict=feed_dict) if step % valid_frequency == 0: logger.info('========Step{}========'.format(step)) logger.info(endurance) metrics = self.get_metrics_5x5(num_batches=100) logger.info(metrics) hq = metrics[0] if hq_baseline > 0: hq_baseline = hq_baseline * decay + hq * (1 - decay) else: hq_baseline = hq logger.info('hq_baseline: {}'.format(hq_baseline)) self.generate_plot(step) endurance += 1 if hq_baseline > best_hq: best_hq = hq_baseline endurance = 0 if save_model: self.save_model(step) if step % print_frequency == 0: data = self.train_dataset.next_batch(batch_size) x = data['input'] z = np.random.normal(size=[batch_size, dim_z]).astype( np.float32) feed_dict = { self.noise: z, self.real_data: x, self.is_training: False } fetch = [ self.gen_cost, self.disc_cost_fake, self.disc_cost_real ] r = sess.run(fetch, feed_dict=feed_dict) logger.info('gen_cost: {}'.format(r[0])) logger.info('disc_cost fake: {}, real: {}'.format(r[1], r[2])) if endurance > config.max_endurance_stud: break logger.info('best_hq: {}'.format(best_hq)) def get_state(self): if self.step_number == 0: state = [0] * self.config.dim_state_rl else: state = [ self.step_number / self.config.max_training_step, math.log(self.mag_disc_grad / self.mag_gen_grad), self.ema_gen_cost, (self.ema_disc_cost_real + self.ema_disc_cost_fake) / 2, self.hq, self.entropy / 3.2 ] return np.array(state, dtype='f') def check_terminate(self): # TODO(haowen) # Early stop and recording the best result # Episode terminates on two condition: # 1) Convergence: inception score doesn't improve in endurance steps # 2) Collapse: action space collapse to one action (not implement yet) if self.same_action_count > 500: logger.info('Terminate reason: Collapse') self.collapse = True return True step = self.step_number if step % self.config.valid_frequency_stud == 0: self.endurance += 1 metrics = self.get_metrics_5x5(100) hq = metrics[0] entropy = metrics[1] self.hq = hq self.entropy = entropy decay = self.config.metric_decay if self.hq_baseline > 0: self.hq_baseline = self.hq_baseline * decay + hq * (1 - decay) else: self.hq_baseline = hq if self.hq_baseline > self.best_hq: logger.info('step: {}, new best result: {}'.\ format(step, self.hq_baseline)) self.best_step = self.step_number self.best_hq = self.hq_baseline self.best_entropy = entropy self.endurance = 0 self.save_model(step) if step % self.config.print_frequency_stud == 0: logger.info('----step{}----'.format(step)) logger.info('hq: {}, entropy: {}'.format(hq, entropy)) logger.info('hq_baseline: {}'.format(self.hq_baseline)) if step > self.config.max_training_step: return True if self.config.stop_strategy_stud == 'prescribed_steps': pass elif self.config.stop_strategy_stud == 'exceeding_endurance' and \ self.endurance > self.config.max_endurance_stud: return True return False def get_step_reward(self): return 0 def get_final_reward(self): if self.collapse: return 0, -self.config.reward_max_value if self.best_entropy < 2.5: # lose mode, fail trail logger.info('lose mode with entropy: {}'.format(self.best_entropy)) return 0, -self.config.reward_max_value hq = self.best_hq reward = self.config.reward_c * hq**2 if self.final_hq_baseline is None: self.final_hq_baseline = hq baseline_hq = self.final_hq_baseline baseline_reward = self.config.reward_c * baseline_hq**2 decay = self.config.inps_baseline_decay adv = reward - baseline_reward adv = min(adv, self.config.reward_max_value) adv = max(adv, -self.config.reward_max_value) # ----Shift average---- self.final_hq_baseline = decay * self.final_hq_baseline\ + (1 - decay) * hq return reward, adv def get_metrics_5x5(self, num_batches=100): all_samples = [] config = self.config batch_size = 100 dim_z = config.dim_z for i in range(num_batches): z = np.random.normal(size=[batch_size, dim_z]).astype(np.float32) feed_dict = {self.noise: z, self.is_training: False} samples = self.sess.run(self.fake_data, feed_dict=feed_dict) all_samples.append(samples) all_samples = np.concatenate(all_samples, axis=0) centers = [] for i in range(5): for j in range(5): centers.append([i * 0.5 - 1, j * 0.5 - 1]) centers = np.array(centers) distance = np.zeros([batch_size * num_batches, 25]) for i in range(25): distance[:, i] = np.sqrt( np.square(all_samples[:, 0] - centers[i, 0]) + np.square(all_samples[:, 1] - centers[i, 1])) high_quality = distance < config.var_noise * 3 count_cluster = np.sum(high_quality, 0) hq = np.sum(count_cluster) / (num_batches * batch_size) p_cluster = count_cluster / np.sum(count_cluster) #print('hq:', np.sum(hq)) #print('count_cluster:', count_cluster) #print('p_cluster:', p_cluster) p_cluster += 1e-8 entropy = -np.sum(p_cluster * np.log(p_cluster)) #print('entropy:', entropy) return hq, entropy def get_metrics_2x2(self, num_batches=100): all_samples = [] config = self.config batch_size = 100 dim_z = config.dim_z for i in range(num_batches): z = np.random.normal(size=[batch_size, dim_z]).astype(np.float32) feed_dict = {self.noise: z, self.is_training: False} samples = self.sess.run(self.fake_data, feed_dict=feed_dict) all_samples.append(samples) all_samples = np.concatenate(all_samples, axis=0) centers = [] for i in range(2): for j in range(2): centers.append([i * 1.0 - 0.5, j * 1.0 - 0.5]) centers = np.array(centers) distance = np.zeros([batch_size * num_batches, 4]) for i in range(4): distance[:, i] = np.sqrt( np.square(all_samples[:, 0] - centers[i, 0]) + np.square(all_samples[:, 1] - centers[i, 1])) high_quality = distance < config.var_noise * 3 count_cluster = np.sum(high_quality, 0) hq = np.sum(count_cluster) / (num_batches * batch_size) p_cluster = count_cluster / np.sum(count_cluster) #print('hq:', np.sum(hq)) print('count_cluster:', count_cluster) #print('p_cluster:', p_cluster) p_cluster += 1e-8 entropy = -np.sum(p_cluster * np.log(p_cluster)) #print('entropy:', entropy) return hq, entropy def generate_plot(self, step): feed_dict = { self.noise: self.fixed_noise_10000, self.is_training: False } samples = self.sess.run(self.fake_data, feed_dict=feed_dict) task_dir = os.path.join(self.config.save_images_dir, self.exp_name) if not os.path.exists(task_dir): os.mkdir(task_dir) save_path = os.path.join(task_dir, 'images_{}.png'.format(step)) plt.scatter(samples[:, 0], samples[:, 1]) plt.show() plt.savefig(save_path, bbox_inches='tight') plt.close()
class Cls(Basic_model): def __init__(self, config, exp_name='new_exp', loss_mode='1'): self.config = config self.graph = tf.Graph() gpu_options = tf.GPUOptions(allow_growth=True) configProto = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.InteractiveSession(config=configProto, graph=self.graph) self.exp_name = exp_name self.loss_mode = loss_mode train_data_file = config.train_data_file valid_data_file = config.valid_data_file test_data_file = config.test_data_file train_stud_data_file = config.train_stud_data_file self.train_dataset = Dataset() self.train_dataset.load_npy(train_data_file) self.valid_dataset = Dataset() self.valid_dataset.load_npy(valid_data_file) self.test_dataset = Dataset() self.test_dataset.load_npy(test_data_file) self.train_stud_dataset = Dataset() self.train_stud_dataset.load_npy(train_stud_data_file) self.reset() self._build_placeholder() self._build_graph() self.reward_baseline = None # average reward over episodes self.improve_baseline = None # averge improvement over steps def reset(self): # ----Reset the model.---- # TODO(haowen) The way to carry step number information should be # reconsiderd self.step_number = [0] self.previous_ce_loss = [0] * self.config.num_pre_loss self.previous_l1_loss = [0] * self.config.num_pre_loss self.previous_valid_acc = [0] * self.config.num_pre_loss self.previous_train_acc = [0] * self.config.num_pre_loss self.previous_action = [0] * self.config.dim_action_rl self.previous_valid_loss = [0] * self.config.num_pre_loss self.previous_train_loss = [0] * self.config.num_pre_loss self.task_dir = None # to control when to terminate the episode self.endurance = 0 self.best_loss = 1e10 self.best_acc = 0 self.test_acc = 0 self.improve_baseline = None def _build_placeholder(self): x_size = self.config.dim_input_stud with self.graph.as_default(): self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32) self.y_plh = tf.placeholder(shape=[None], dtype=tf.int32) def _build_graph(self): x_size = self.config.dim_input_stud h_size = self.config.dim_hidden_stud y_size = self.config.dim_output_stud lr = self.config.lr_stud with self.graph.as_default(): # ----3-layer ffn---- #hidden1 = slim.fully_connected(self.x_plh, h_size, # activation_fn=tf.nn.tanh) #hidden2 = slim.fully_connected(hidden1, 32, # activation_fn=tf.nn.tanh) #self.pred = slim.fully_connected(hidden1, y_size, # activation_fn=None) w1 = weight_variable([x_size, h_size], name='w1') b1 = bias_variable([h_size], name='b1') hidden = tf.nn.relu(tf.matmul(self.x_plh, w1) + b1) w2 = weight_variable([h_size, y_size], name='w2') b2 = bias_variable([y_size], name='b2') self.pred = tf.matmul(hidden, w2) + b2 # define loss self.loss_ce = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.y_plh, logits=self.pred, name='loss')) y_ = tf.argmax(self.pred, 1, output_type=tf.int32) correct_prediction = tf.equal(y_, self.y_plh) self.correct_prediction = correct_prediction self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) tvars = tf.trainable_variables() self.tvars = tvars l1_regularizer = tf.contrib.layers.l1_regularizer( scale=self.config.lambda1_stud, scope=None) self.loss_l1 = tf.contrib.layers.apply_regularization( l1_regularizer, tvars) l2_regularizer = tf.contrib.layers.l2_regularizer( scale=self.config.lambda2_stud, scope=None) self.loss_l2 = tf.contrib.layers.apply_regularization( l2_regularizer, tvars) if self.loss_mode == '0': self.loss_total = self.loss_ce print('ce loss') elif self.loss_mode == '1': self.loss_total = self.loss_ce + self.loss_l1 print('ce loss and l1 loss') print('lambda1:', self.config.lambda1_stud) elif self.loss_mode == '2': self.loss_total = self.loss_ce + self.loss_l2 print('ce loss and l2 loss') print('lambda2:', self.config.lambda2_stud) elif self.loss_mode == '3': self.loss_total = self.loss_ce + self.loss_l1 + self.loss_l2 print('ce loss, l1 loss and l2 loss') print('lambda1:', self.config.lambda1_stud) print('lambda2:', self.config.lambda2_stud) else: raise NotImplementedError # ----Define update operation.---- self.update_ce = tf.train.GradientDescentOptimizer(lr).\ minimize(self.loss_ce) self.update_l1 = tf.train.GradientDescentOptimizer(lr).\ minimize(self.loss_l1) self.update_l2 = tf.train.GradientDescentOptimizer(lr).\ minimize(self.loss_l2) self.update_total = tf.train.GradientDescentOptimizer(lr).\ minimize(self.loss_total) self.update = [self.update_ce, self.update_l1, self.update_l2] self.init = tf.global_variables_initializer() def train(self): # ----Optimize total loss.---- data = self.train_dataset.next_batch(self.config.batch_size) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} fetch = [self.loss_ce, self.accuracy, self.update_total] loss, acc, _ = self.sess.run(fetch, feed_dict=feed_dict) return loss, acc def valid(self, dataset=None): # ----Test on validation set.---- if not dataset: dataset = self.valid_dataset data = dataset.next_batch(dataset.num_examples) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} fetch = [self.loss_ce, self.accuracy, self.pred, self.y_plh] [loss_ce, acc, pred, gdth] = self.sess.run(fetch, feed_dict=feed_dict) return loss_ce, acc, pred, gdth def response(self, action, lr, mode='TRAIN'): # Given an action, return the new state, reward and whether dead # Args: # action: one hot encoding of actions # Returns: # state: shape = [dim_state_rl] # reward: shape = [1] # dead: boolean # sess = self.sess if mode == 'TRAIN': dataset = self.train_dataset else: dataset = self.train_dataset data = dataset.next_batch(self.config.batch_size) x = data['input'] y = data['target'] feed_dict = {self.x_plh: x, self.y_plh: y} a = np.argmax(np.array(action)) sess.run(self.update[a], feed_dict=feed_dict) fetch = [self.loss_ce, self.loss_l1] loss_ce, loss_l1 = sess.run(fetch, feed_dict=feed_dict) valid_loss, valid_acc, _, _ = self.valid() train_loss, train_acc, _, _ = self.valid(dataset=dataset) # ----Update state.---- self.previous_ce_loss = self.previous_ce_loss[1:] + [loss_ce.tolist()] self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()] self.previous_action = action.tolist() self.step_number[0] += 1 self.previous_valid_loss = self.previous_valid_loss[1:]\ + [valid_loss.tolist()] self.previous_train_loss = self.previous_train_loss[1:]\ + [train_loss.tolist()] self.previous_valid_acc = self.previous_valid_acc[1:]\ + [valid_acc.tolist()] self.previous_train_acc = self.previous_train_acc[1:]\ + [train_acc.tolist()] reward = self.get_step_reward() # ----Early stop and record best result.---- dead = self.check_terminate() state = self.get_state() return state, reward, dead def check_terminate(self): # TODO(haowen) # Early stop and recording the best result # Episode terminates on two condition: # 1) Convergence: valid loss doesn't improve in endurance steps # 2) Collapse: action space collapse to one action (not implement yet) step = self.step_number[0] if step % self.config.valid_frequency_stud == 0: self.endurance += 1 loss, acc, _, _ = self.valid() if acc > self.best_acc: self.best_step = self.step_number[0] self.best_loss = loss self.best_acc = acc self.endurance = 0 _, test_acc, _, _ = self.valid(dataset=self.test_dataset) self.test_acc = test_acc if self.endurance > self.config.max_endurance_stud: return True return False def get_step_reward(self): # TODO(haowen) Use the decrease of validation loss as step reward if self.improve_baseline is None: # ----First step, nothing to comparing with.---- improve = 0.1 else: improve = (self.previous_valid_loss[-2] - self.previous_valid_loss[-1]) # TODO(haowen) Try to use sqrt function instead of sign function # ----With baseline.---- if self.improve_baseline is None: self.improve_baseline = improve decay = self.config.reward_baseline_decay self.improve_baseline = decay * self.improve_baseline\ + (1 - decay) * improve #TODO(haowen) Remove nonlinearity value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5)) #value = abs(improve) / (abs(self.improve_baseline) + 1e-5) value = min(value, self.config.reward_max_value) return math.copysign(value, improve) * self.config.reward_step_rl # ----Without baseline.---- #return math.copysign(math.sqrt(abs(improve)), improve) # TODO(haowen) This design of reward may cause unbalance because # positive number is more than negative number in nature #if abs(improve) < 1e-5: # return 0 # no reward if the difference is too small #elif improve > 0: # # TODO(haowen) Try not to give reward to the reduce of loss # # This reward will strengthen penalty and weaken reward # return self.config.reward_step_rl # #return 0 #else: # return -self.config.reward_step_rl def get_final_reward(self): acc = max(self.best_acc, 1 / self.config.dim_output_stud) reward = -self.config.reward_c / acc if self.reward_baseline is None: self.reward_baseline = reward decay = self.config.reward_baseline_decay adv = reward - self.reward_baseline adv = min(adv, self.config.reward_max_value) adv = max(adv, -self.config.reward_max_value) # ----Shift average---- self.reward_baseline = decay * self.reward_baseline\ + (1 - decay) * reward return reward, adv def get_state(self): abs_diff = [] rel_diff = [] if self.improve_baseline is None: ib = 1 else: ib = self.improve_baseline for v, t in zip(self.previous_valid_loss, self.previous_train_loss): abs_diff.append(v - t) if t > 1e-6: rel_diff.append(v / t) else: rel_diff.append(1) state = ( rel_diff[-1:] + _normalize1([abs(ib)]) + _normalize2(self.previous_ce_loss[-1:]) + _normalize2(self.previous_l1_loss[-1:]) + self.previous_train_acc[-1:] + [self.previous_train_acc[-1] - self.previous_valid_acc[-1]] + self.previous_valid_acc[-1:]) return np.array(state, dtype='f')