예제 #1
0
파일: cls.py 프로젝트: safpla/autoLoss
 def __init__(self, config, exp_name='new_exp', loss_mode='1'):
     self.config = config
     self.graph = tf.Graph()
     gpu_options = tf.GPUOptions(allow_growth=True)
     configProto = tf.ConfigProto(gpu_options=gpu_options)
     self.sess = tf.InteractiveSession(config=configProto, graph=self.graph)
     self.exp_name = exp_name
     self.loss_mode = loss_mode
     train_data_file = config.train_data_file
     valid_data_file = config.valid_data_file
     test_data_file = config.test_data_file
     train_stud_data_file = config.train_stud_data_file
     self.train_dataset = Dataset()
     self.train_dataset.load_npy(train_data_file)
     self.valid_dataset = Dataset()
     self.valid_dataset.load_npy(valid_data_file)
     self.test_dataset = Dataset()
     self.test_dataset.load_npy(test_data_file)
     self.train_stud_dataset = Dataset()
     self.train_stud_dataset.load_npy(train_stud_data_file)
     self.reset()
     self._build_placeholder()
     self._build_graph()
     self.reward_baseline = None  # average reward over episodes
     self.improve_baseline = None  # averge improvement over steps
예제 #2
0
 def _load_dataset(self):
     conf = self.config.data
     train_c_data_file = conf.train_c_data_file
     valid_c_data_file = conf.valid_c_data_file
     train_t_data_file = conf.train_t_data_file
     valid_t_data_file = conf.valid_t_data_file
     test_data_file = conf.test_data_file
     self.train_c_dataset = Dataset()
     self.train_c_dataset.load_npy(train_c_data_file)
     self.valid_c_dataset = Dataset()
     self.valid_c_dataset.load_npy(valid_c_data_file)
     self.train_t_dataset = Dataset()
     self.train_t_dataset.load_npy(train_t_data_file)
     self.valid_t_dataset = Dataset()
     self.valid_t_dataset.load_npy(valid_t_data_file)
     self.test_dataset = Dataset()
     self.test_dataset.load_npy(test_data_file)
예제 #3
0
 def __init__(self, config, exp_name='new_exp_gan_grid'):
     self.config = config
     self.graph = tf.Graph()
     self.exp_name = exp_name
     gpu_options = tf.GPUOptions(allow_growth=True)
     configProto = tf.ConfigProto(gpu_options=gpu_options)
     self.sess = tf.InteractiveSession(config=configProto, graph=self.graph)
     # ----Loss_mode is only for DEBUG usage.----
     self.train_dataset = Dataset()
     self.train_dataset.load_npy(config.train_data_file)
     self.valid_dataset = Dataset()
     self.valid_dataset.load_npy(config.valid_data_file)
     self._build_placeholder()
     self._build_graph()
     self.final_hq_baseline = None  # average reward over episodes
     self.reset()
     self.fixed_noise_10000 = np.random.normal(size=(10000, config.dim_z))\
         .astype('float32')
예제 #4
0
 def _load_datasets(self):
     config = self.config
     train_ctrl_data_file = os.path.join(config.data_dir,
                                         config.train_ctrl_data_file)
     train_task_data_file = os.path.join(config.data_dir,
                                         config.train_task_data_file)
     valid_ctrl_data_file = os.path.join(config.data_dir,
                                         config.valid_ctrl_data_file)
     valid_task_data_file = os.path.join(config.data_dir,
                                         config.valid_task_data_file)
     test_data_file = os.path.join(config.data_dir, config.test_data_file)
     self.train_ctrl_dataset = Dataset()
     self.train_ctrl_dataset.load_npy(train_ctrl_data_file)
     self.valid_ctrl_dataset = Dataset()
     self.valid_ctrl_dataset.load_npy(valid_ctrl_data_file)
     self.train_task_dataset = Dataset()
     self.train_task_dataset.load_npy(train_task_data_file)
     self.valid_task_dataset = Dataset()
     self.valid_task_dataset.load_npy(valid_task_data_file)
     self.test_dataset = Dataset()
     self.test_dataset.load_npy(test_data_file)
def get_inception_score(mnist_model, images, splits=10):
    bs = 100
    preds = []
    total_samples = images.shape[0]
    n_batches = int(math.ceil(float(total_samples) / float(bs)))
    for i in range(n_batches):
        input = images[(i * bs):min((i + 1) * bs, total_samples)]
        target = np.zeros([input.shape[0], 10])
        dataset = Dataset()
        dataset.build_from_data(input, target)
        fetch = [mnist_model.softmax]
        pred = mnist_model.valid(dataset, fetch)
        preds.append(pred[0])
    preds = np.concatenate(preds, 0)
    scores = []
    for i in range(splits):
        part = preds[(i * preds.shape[0] //
                      splits):((i + 1) * preds.shape[0] // splits), :]
        kl = part * (np.log(part) -
                     np.log(np.expand_dims(np.mean(part, 0), 0)))
        kl = np.mean(np.sum(kl, 1))
        scores.append(np.exp(kl))
    return np.mean(scores), np.std(scores)
예제 #6
0
파일: reg.py 프로젝트: safpla/autoLoss
 def __init__(self, config, exp_name='new_exp', loss_mode='1'):
     self.config = config
     self.graph = tf.Graph()
     gpu_options = tf.GPUOptions(allow_growth=True)
     configProto = tf.ConfigProto(gpu_options=gpu_options)
     self.sess = tf.InteractiveSession(config=configProto, graph=self.graph)
     # ----Loss_mode is only for DEBUG usage.----
     #   0: only mse, 1: mse & l1
     self.loss_mode = loss_mode
     self.exp_name = exp_name
     train_data_file = config.train_data_file
     train_stud_data_file = config.train_stud_data_file
     valid_data_file = config.valid_data_file
     self.train_dataset = Dataset()
     self.train_dataset.load_npy(train_data_file)
     self.valid_dataset = Dataset()
     self.valid_dataset.load_npy(valid_data_file)
     self.train_stud_dataset = Dataset()
     self.train_stud_dataset.load_npy(train_stud_data_file)
     self.reset()
     self._build_placeholder()
     self._build_graph()
     self.reward_baseline = None
     self.improve_baseline = None
예제 #7
0
class Cls(Basic_model):
    '''
    Public variables (all task models should have these public variables):
        self.extra_info
        self.checkpoint_dir
        self.best_performance
        self.test_dataset
    '''
    def __init__(self, config, exp_name='new_exp'):
        super(Cls, self).__init__(config, exp_name)
        self.reset()
        self._load_datasets()
        self._build_placeholder()
        self._build_graph()
        self.reward_baseline = None  # average reward over episodes
        self.improve_baseline = None  # averge improvement over steps

    def _load_datasets(self):
        config = self.config
        train_ctrl_data_file = os.path.join(config.data_dir,
                                            config.train_ctrl_data_file)
        train_task_data_file = os.path.join(config.data_dir,
                                            config.train_task_data_file)
        valid_ctrl_data_file = os.path.join(config.data_dir,
                                            config.valid_ctrl_data_file)
        valid_task_data_file = os.path.join(config.data_dir,
                                            config.valid_task_data_file)
        test_data_file = os.path.join(config.data_dir, config.test_data_file)
        self.train_ctrl_dataset = Dataset()
        self.train_ctrl_dataset.load_npy(train_ctrl_data_file)
        self.valid_ctrl_dataset = Dataset()
        self.valid_ctrl_dataset.load_npy(valid_ctrl_data_file)
        self.train_task_dataset = Dataset()
        self.train_task_dataset.load_npy(train_task_data_file)
        self.valid_task_dataset = Dataset()
        self.valid_task_dataset.load_npy(valid_task_data_file)
        self.test_dataset = Dataset()
        self.test_dataset.load_npy(test_data_file)

    def reset(self):
        # ----Reset the model.----
        # TODO(haowen) The way to carry step number information should be
        # reconsiderd
        self.step_number = [0]
        self.previous_ce_loss = [0] * self.config.num_pre_loss
        self.previous_l1_loss = [0] * self.config.num_pre_loss
        self.previous_valid_acc = [0] * self.config.num_pre_loss
        self.previous_train_acc = [0] * self.config.num_pre_loss
        self.previous_valid_loss = [0] * self.config.num_pre_loss
        self.previous_train_loss = [0] * self.config.num_pre_loss
        self.mag_ce_grad = 0
        self.mag_l1_grad = 0

        # to control when to terminate the episode
        self.endurance = 0
        self.collapse = False
        # The bigger the performance is, the better. In this case, performance
        # is accuracy. Naming it as performance in order to be compatible with
        # other tasks.
        self.best_performance = -1e10
        self.improve_baseline = None

    def _build_placeholder(self):
        x_size = self.config.dim_input_task
        with self.graph.as_default():
            self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32)
            self.y_plh = tf.placeholder(shape=[None], dtype=tf.int32)

    def _build_graph(self):
        x_size = self.config.dim_input_task
        h_size = self.config.dim_hidden_task
        y_size = self.config.dim_output_task
        lr = self.config.lr_task

        with self.graph.as_default():
            w1 = weight_variable([x_size, h_size], name='w1')
            b1 = bias_variable([h_size], name='b1')
            hidden = tf.nn.relu(tf.matmul(self.x_plh, w1) + b1)

            w2 = weight_variable([h_size, y_size], name='w2')
            b2 = bias_variable([y_size], name='b2')
            self.pred = tf.matmul(hidden, w2) + b2

            # define loss
            # cross entropy loss
            self.loss_ce = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.y_plh, logits=self.pred, name='loss'))
            y_ = tf.argmax(self.pred, 1, output_type=tf.int32)
            correct_prediction = tf.equal(y_, self.y_plh)
            self.correct_prediction = correct_prediction
            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))

            tvars = tf.trainable_variables()
            self.tvars = tvars
            l1_regularizer = tf.contrib.layers.l1_regularizer(
                scale=self.config.lambda_task, scope=None)
            self.loss_l1 = tf.contrib.layers.apply_regularization(
                l1_regularizer, tvars)
            self.loss_total = self.loss_ce + self.loss_l1

            # ----Define update operation.----
            optimizer = tf.train.AdamOptimizer(lr)
            ce_gvs = optimizer.compute_gradients(self.loss_ce, tvars)
            l1_gvs = optimizer.compute_gradients(self.loss_l1, tvars)
            total_gvs = optimizer.compute_gradients(self.loss_total, tvars)
            self.update_ce = optimizer.apply_gradients(ce_gvs)
            self.update_l1 = optimizer.apply_gradients(l1_gvs)
            self.update_total = optimizer.apply_gradients(total_gvs)
            self.update = [self.update_ce, self.update_l1]

            self.init = tf.global_variables_initializer()
            self.saver = tf.train.Saver()
            self.ce_grad = [grad for grad, _ in ce_gvs]
            self.l1_grad = [grad for grad, _ in l1_gvs]

    def valid(self, dataset=None):
        if not dataset:
            if self.config.args.task_mode == 'train':
                dataset = self.valid_ctrl_dataset
            elif self.config.args.task_mode == 'test':
                dataset = self.valid_task_dataset
            elif self.config.args.task_mode == 'baseline':
                dataset = self.valid_task_dataset
            else:
                logger.exception('Unexcepted task_mode: {}'.\
                                 format(self.config.args.task_mode))
        data = dataset.next_batch(dataset.num_examples)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        fetch = [self.loss_ce, self.accuracy, self.pred, self.y_plh]
        [loss_ce, acc, pred, gdth] = self.sess.run(fetch, feed_dict=feed_dict)
        return loss_ce, acc, pred, gdth

    def response(self, action):
        """ Given an action, return the new state, reward and whether dead

        Args:
            action: one hot encoding of actions

        Returns:
            state: shape = [dim_input_ctrl]
            reward: shape = [1]
            dead: boolean
        """
        if self.config.args.task_mode == 'train':
            dataset = self.train_ctrl_dataset
        elif self.config.args.task_mode == 'test':
            dataset = self.train_task_dataset
        elif self.config.args.task_mode == 'baseline':
            dataset = self.train_task_dataset
        else:
            logger.exception('Unexcepted mode: {}'.\
                             format(self.config.args.task_mode))
        data = dataset.next_batch(self.config.batch_size)

        sess = self.sess
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}

        a = np.argmax(np.array(action))
        sess.run(self.update[a], feed_dict=feed_dict)

        fetch = [
            self.loss_ce, self.loss_l1, self.accuracy, self.ce_grad,
            self.l1_grad
        ]
        loss_ce, loss_l1, acc, ce_grad, l1_grad = sess.run(fetch,
                                                           feed_dict=feed_dict)
        valid_loss, valid_acc, _, _ = self.valid()
        train_loss, train_acc = loss_ce, acc

        # ----Update state.----
        self.previous_ce_loss = self.previous_ce_loss[1:] + [loss_ce.tolist()]
        self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()]
        self.step_number[0] += 1
        self.previous_valid_loss = self.previous_valid_loss[1:]\
            + [valid_loss.tolist()]
        self.previous_train_loss = self.previous_train_loss[1:]\
            + [train_loss.tolist()]
        self.previous_valid_acc = self.previous_valid_acc[1:]\
            + [valid_acc.tolist()]
        self.previous_train_acc = self.previous_train_acc[1:]\
            + [train_acc.tolist()]
        self.mag_ce_grad = self.get_grads_magnitude(ce_grad)
        self.mag_l1_grad = self.get_grads_magnitude(l1_grad)

        # Public variable
        self.extra_info = {
            'valid_loss': valid_loss,
            'train_loss': train_loss,
            'valid_acc': valid_acc,
            'train_acc': train_acc
        }

        reward = self.get_step_reward()
        # ----Early stop and record best result.----
        dead = self.check_terminate()
        state = self.get_state()
        return state, reward, dead

    def check_terminate(self):
        # TODO(haowen)
        # Early stop and recording the best result
        # Episode terminates on two condition:
        # 1) Convergence: valid loss doesn't improve in endurance steps
        # 2) Collapse: action space collapse to one action (not implement yet)
        step = self.step_number[0]
        if step % self.config.valid_frequency_task == 0:
            self.endurance += 1
            loss, acc, _, _ = self.valid()
            if acc > self.best_performance:
                self.best_step = self.step_number[0]
                self.best_performance = acc
                self.endurance = 0
                if not self.config.args.task_mode == 'train':
                    self.save_model(step, mute=True)

        if step > self.config.max_training_step:
            return True
        if self.config.stop_strategy_task == 'exceeding_endurance' and \
                self.endurance > self.config.max_endurance_task:
            return True
        return False

    def get_step_reward(self):
        # TODO(haowen) Use the decrease of validation loss as step reward
        if self.improve_baseline is None:
            # ----First step, nothing to comparing with.----
            improve = 0.1
        else:
            improve = (self.previous_valid_loss[-2] -
                       self.previous_valid_loss[-1])

        # TODO(haowen) Try to use sqrt function instead of sign function
        # ----With baseline.----
        if self.improve_baseline is None:
            self.improve_baseline = improve
        decay = self.config.reward_baseline_decay
        self.improve_baseline = decay * self.improve_baseline\
            + (1 - decay) * improve

        value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5))
        #value = abs(improve) / (abs(self.improve_baseline) + 1e-5)
        value = min(value, self.config.reward_max_value)
        return math.copysign(value, improve) * self.config.reward_step_ctrl

    def get_final_reward(self):
        '''
        Return:
            reward: real reward
            adv: advantage, subtract the baseline from reward and then normalize it
        '''
        assert self.best_performance > -1e10 + 1
        acc = max(self.best_performance, 1 / self.config.dim_output_task)
        reward = -self.config.reward_c / acc

        # Calculate baseline
        if self.reward_baseline is None:
            self.reward_baseline = reward
        else:
            decay = self.config.reward_baseline_decay
            self.reward_baseline = decay * self.reward_baseline\
                + (1 - decay) * reward

        # Calculate advantage
        adv = reward - self.reward_baseline
        adv = min(adv, self.config.reward_max_value)
        adv = max(adv, -self.config.reward_max_value)
        return reward, adv

    def get_state(self):
        abs_diff = []
        rel_diff = []
        if self.improve_baseline is None:
            ib = 1
        else:
            ib = self.improve_baseline

        # relative difference between valid_loss and train_loss
        v = self.previous_valid_loss[-1]
        t = self.previous_train_loss[-1]
        abs_diff = v - t
        if t > 1e-6:
            rel_diff = (v - t) / t
        else:
            rel_diff = 0
        state0 = rel_diff

        # normalized baseline improvement
        state1 = 1 + math.log(abs(ib) + 1e-4) / 10

        # normalized mse ce_loss and l1_loss:
        ce_loss = self.previous_ce_loss[-1]
        state2 = ce_loss

        l1_loss = self.previous_l1_loss[-1]
        state3 = l1_loss

        # train_acc, valid_acc and their difference
        if self.previous_valid_acc[-1] == 0:
            state4 = 0
        else:
            state4 = (self.previous_train_acc[-1] - self.previous_valid_acc[-1]) /\
                self.previous_valid_acc[-1]

        # difference between magnitude of ce gradient and magnitude of l1
        # gradient
        if self.mag_l1_grad == 0:
            state5 = 0
        else:
            state5 = (self.mag_ce_grad - self.mag_l1_grad) / self.mag_l1_grad

        state = [state0, state1, state2, state3, state4, state5]
        return np.array(state, dtype='f')
예제 #8
0
class Reg(Basic_model):
    '''
    Public variables (all task models should have these public variables):
        self.extra_info
        self.checkpoint_dir
        self.best_performance
        self.test_dataset
    '''
    def __init__(self, config, exp_name='new_exp'):
        super(Reg, self).__init__(config, exp_name)
        self.reset()
        self._load_datasets()
        self._build_placeholder()
        self._build_graph()
        self.reward_baseline = None
        self.improve_baseline = None

    def _load_datasets(self):
        config = self.config
        train_ctrl_data_file = os.path.join(config.data_dir,
                                            config.train_ctrl_data_file)
        train_task_data_file = os.path.join(config.data_dir,
                                            config.train_task_data_file)
        valid_ctrl_data_file = os.path.join(config.data_dir,
                                            config.valid_ctrl_data_file)
        valid_task_data_file = os.path.join(config.data_dir,
                                            config.valid_task_data_file)
        test_data_file = os.path.join(config.data_dir, config.test_data_file)
        self.train_ctrl_dataset = Dataset()
        self.train_ctrl_dataset.load_npy(train_ctrl_data_file)
        self.valid_ctrl_dataset = Dataset()
        self.valid_ctrl_dataset.load_npy(valid_ctrl_data_file)
        self.train_task_dataset = Dataset()
        self.train_task_dataset.load_npy(train_task_data_file)
        self.valid_task_dataset = Dataset()
        self.valid_task_dataset.load_npy(valid_task_data_file)
        self.test_dataset = Dataset()
        self.test_dataset.load_npy(test_data_file)

    def reset(self):
        """ Reset the model """
        # TODO(haowen) The way to carry step number information should be
        # reconsiderd
        self.step_number = [0]
        self.previous_mse_loss = [0] * self.config.num_pre_loss
        self.previous_l1_loss = [0] * self.config.num_pre_loss
        self.previous_valid_loss = [0] * self.config.num_pre_loss
        self.previous_train_loss = [0] * self.config.num_pre_loss
        self.mag_mse_grad = 0
        self.mag_l1_grad = 0

        # to control when to terminate the episode
        self.endurance = 0
        # The bigger, the better. In this case, performance is negative loss.
        # Naming it as performance in order to be compatible with other tasks.
        self.best_performance = -1e10
        self.collapse = False
        self.improve_baseline = None

    def _build_placeholder(self):
        x_size = self.config.dim_input_task
        with self.graph.as_default():
            self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32)
            self.y_plh = tf.placeholder(shape=[None], dtype=tf.float32)

    def _build_graph(self):
        y_size = self.config.dim_output_task
        lr = self.config.lr_task

        with self.graph.as_default():
            # ----quadratic equation----
            #  ---first order---
            x_size = self.config.dim_input_task
            initial = tf.random_normal(shape=[x_size, 1], stddev=0.1, seed=1)
            w1 = tf.Variable(initial)
            sum1 = tf.matmul(self.x_plh, w1)

            #  ---second order---
            initial = tf.random_normal(shape=[x_size, x_size],
                                       stddev=0.01,
                                       seed=1)
            w2 = tf.Variable(initial)
            xx = tf.matmul(tf.reshape(self.x_plh, [-1, x_size, 1]),
                           tf.reshape(self.x_plh, [-1, 1, x_size]))
            sum2 = tf.matmul(tf.reshape(xx, [-1, x_size * x_size]),
                             tf.reshape(w2, [x_size * x_size, 1]))

            # NOTE(Haowen): Divide by 10 is important here to promise
            # convergence. It is some kind of cheating but we think it is fine
            # as a demo task.
            self.pred = sum1 + sum2 / 10

            # Only for debug
            self.w1 = w1
            self.w2 = w2

            # define loss
            self.loss_mse = tf.reduce_mean(
                tf.square(tf.squeeze(self.pred) - self.y_plh))

            # NOTE(Haowen): Somehow the l1 regularizers provided by tf
            # provide a better performance than self-designed regularizers
            # showing in the flowing 6 lines.

            #self.loss_l1 = self.config.lambda_task * (
            #    tf.reduce_sum(tf.reduce_sum(tf.abs(w2)))\
            #    + tf.reduce_sum(tf.reduce_sum(tf.abs(w1))))

            tvars = tf.trainable_variables()
            l1_regularizer = tf.contrib.layers.l1_regularizer(
                scale=self.config.lambda_task, scope=None)
            self.loss_l1 = tf.contrib.layers.apply_regularization(
                l1_regularizer, tvars)
            self.loss_total = self.loss_mse + self.loss_l1

            # ----Define update operation.----
            optimizer = tf.train.AdamOptimizer(lr)
            mse_gvs = optimizer.compute_gradients(self.loss_mse, tvars)
            l1_gvs = optimizer.compute_gradients(self.loss_l1, tvars)
            total_gvs = optimizer.compute_gradients(self.loss_total, tvars)
            self.update_mse = optimizer.apply_gradients(mse_gvs)
            self.update_l1 = optimizer.apply_gradients(l1_gvs)
            self.update_total = optimizer.apply_gradients(total_gvs)
            self.update = [self.update_mse, self.update_l1]

            self.init = tf.global_variables_initializer()
            self.saver = tf.train.Saver()
            self.mse_grad = [grad for grad, _ in mse_gvs]
            self.l1_grad = [grad for grad, _ in l1_gvs]

    def valid(self, dataset=None):
        if not dataset:
            if self.config.args.task_mode == 'train':
                dataset = self.valid_ctrl_dataset
            elif self.config.args.task_mode == 'test':
                dataset = self.valid_task_dataset
            elif self.config.args.task_mode == 'baseline':
                dataset = self.valid_task_dataset
            else:
                logger.exception('Unexcepted task_mode: {}'.\
                                format(self.config.args.task_mode))
        data = dataset.next_batch(dataset.num_examples)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        fetch = [self.loss_mse, self.pred, self.y_plh]
        [loss_mse, pred, gdth] = self.sess.run(fetch, feed_dict=feed_dict)
        return loss_mse, pred, gdth

    def response(self, action):
        """ Given an action, return the new state, reward and whether dead

        Args:
            action: one hot encoding of actions

        Returns:
            state: shape = [dim_input_ctrl]
            reward: shape = [1]
            dead: boolean
        """
        if self.config.args.task_mode == 'train':
            dataset = self.train_ctrl_dataset
        elif self.config.args.task_mode == 'test':
            dataset = self.train_task_dataset
        elif self.config.args.task_mode == 'baseline':
            dataset = self.train_task_dataset
        else:
            logger.exception('Unexcepted mode: {}'.\
                             format(self.config.args.task_mode))
        data = dataset.next_batch(self.config.batch_size)

        sess = self.sess
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}

        a = np.argmax(np.array(action))
        sess.run(self.update[a], feed_dict=feed_dict)

        fetch = [self.loss_mse, self.loss_l1, self.mse_grad, self.l1_grad]
        loss_mse, loss_l1, mse_grad, l1_grad = sess.run(fetch,
                                                        feed_dict=feed_dict)
        valid_loss, _, _ = self.valid()
        train_loss = loss_mse

        # ----Update state.----
        self.previous_mse_loss = self.previous_mse_loss[1:] + [
            loss_mse.tolist()
        ]
        self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()]
        self.step_number[0] += 1
        self.previous_valid_loss = self.previous_valid_loss[1:]\
            + [valid_loss.tolist()]
        self.previous_train_loss = self.previous_train_loss[1:]\
            + [train_loss.tolist()]
        self.mag_mse_grad = self.get_grads_magnitude(mse_grad)
        self.mag_l1_grad = self.get_grads_magnitude(l1_grad)

        # Public variable
        self.extra_info = {
            'valid_loss': self.previous_valid_loss[-1],
            'train_loss': self.previous_train_loss[-1]
        }

        reward = self.get_step_reward()
        # ----Early stop and record best result.----
        dead = self.check_terminate()
        state = self.get_state()
        return state, reward, dead

    def check_terminate(self):
        # TODO(haowen)
        # Episode terminates on two condition:
        # 1) Convergence: valid loss doesn't improve in endurance steps
        # 2) Collapse: action space collapse to one action (not implement yet)

        # Return True if the training should stop, otherwise return False
        step = self.step_number[0]
        if step % self.config.valid_frequency_task == 0:
            self.endurance += 1
            loss, _, _ = self.valid()
            if -loss > self.best_performance:
                self.best_step = self.step_number[0]
                self.best_performance = -loss
                self.endurance = 0
                if not self.config.args.task_mode == 'train':
                    self.save_model(step, mute=True)

        if step > self.config.max_training_step:
            return True
        if self.config.stop_strategy_task == 'exceeding_endurance' and \
                self.endurance > self.config.max_endurance_task:
            return True
        return False

    def get_step_reward(self):
        # TODO(haowen) Use the decrease of validation loss as step reward
        if self.improve_baseline is None:
            # ----First step, nothing to compare with.----
            improve = 0.1
        else:
            improve = (self.previous_valid_loss[-2] -
                       self.previous_valid_loss[-1])

        # TODO(haowen) Try to use sqrt function instead of sign function
        if self.improve_baseline is None:
            self.improve_baseline = improve
        decay = self.config.reward_baseline_decay
        self.improve_baseline = decay * self.improve_baseline\
            + (1 - decay) * improve

        value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5))
        #value = abs(improve) / (abs(self.improve_baseline) + 1e-5)
        value = min(value, self.config.reward_max_value)
        return math.copysign(value, improve) * self.config.reward_step_ctrl

    def get_final_reward(self):
        '''
        Return:
            reward: real reward
            adv: advantage, subtract the baseline from reward and then normalize it
        '''
        assert self.best_performance > -1e10 + 1
        loss_mse = -self.best_performance
        reward = self.config.reward_c / loss_mse

        # Calculate baseline
        if self.reward_baseline is None:
            self.reward_baseline = reward
        else:
            decay = self.config.reward_baseline_decay
            self.reward_baseline = decay * self.reward_baseline\
                + (1 - decay) * reward

        # Calculate advantage
        adv = reward - self.reward_baseline
        adv = min(adv, self.config.reward_max_value)
        adv = max(adv, -self.config.reward_max_value)
        return reward, adv

    def get_state(self):
        abs_diff = []
        rel_diff = []
        if self.improve_baseline is None:
            ib = 1
        else:
            ib = self.improve_baseline

        # relative difference between valid_loss and train_loss:
        # we only consider the latest step
        v = self.previous_valid_loss[-1]
        t = self.previous_train_loss[-1]
        abs_diff = v - t
        if t > 1e-6:
            rel_diff = (v - t) / t
        else:
            rel_diff = 0
        state0 = rel_diff

        # normalized baseline improvement
        state1 = 1 + math.log(abs(ib) + 1e-5) / 12

        # normalized mse loss
        mse_loss = self.previous_mse_loss[-1]
        state2 = min(1, mse_loss / 20) - 0.5

        # normalized l1 loss
        l1_loss = self.previous_l1_loss[-1]
        state3 = l1_loss - 1

        # difference between magnitude of mse gradient and magnitude of l1
        # gradient
        state4 = self.mag_mse_grad - self.mag_l1_grad

        state = [state0, state1, state2, state3, state4]

        return np.array(state, dtype='f')
예제 #9
0
def main():
    config_path = os.path.join(root_path, 'config/skirt_length.cfg')
    config = Parser(config_path)
    num_classes = 6
    dataset = Dataset(config, tf.estimator.ModeKeys.TRAIN)
    dataset_valid = Dataset(config, tf.estimator.ModeKeys.EVAL)
    dataset_test = Dataset(config, tf.estimator.ModeKeys.PREDICT)

    tf.reset_default_graph()
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95,
                                allow_growth=True)
    sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options,
                                                       allow_soft_placement=True))
    model = Model(config, num_classes)
    model.load_pretrain(sess)

    saver = tf.train.Saver(max_to_keep=5)

    best_acc = 0
    best_loss = 100
    test_acc = 0
    test_loss = 0
    valid_acc = 0
    valid_loss = 0
    train_acc = 0
    train_loss = 0

    timedelay = 0
    steps = 0
    batch_size = config.batch_size
    start_time = time.time()

    valid_samples = dataset_valid.next_batch(batch_size)

    while (timedelay < config.timedelay_num) and (steps < config.max_step):
        samples = dataset.next_batch(batch_size)
        steps += 1
        model.train_one_step(sess, samples)
        if steps % config.summary_steps == 0:
            train_loss, train_results = model.test_by_batch(sess, samples)
            train_acc = evaluate_metrics(train_results)

            valid_loss, valid_results = model.test_by_batch(sess, valid_samples)
            valid_acc = evaluate_metrics(valid_results)

            if best_loss > valid_loss or best_acc < valid_acc:
                timedelay = 0
                saver.save(sess, os.path.join(config.exp_dir, 'E01/fashionAI'),
                           global_step=steps)
            else:
                timedelay += 1

            if best_acc < valid_acc:
                best_acc = valid_acc
            if best_loss > valid_loss:
                best_loss = valid_loss

            sys.stdout.write('\nBatches: %d' % steps)
            sys.stdout.write('\nBatch Time: %4fs' % (1.0 * (time.time() - start_time) / config.summary_steps))

            sys.stdout.write('\nTrain acc: %.6f' % train_acc)
            sys.stdout.write('\tTrain Loss: %.6f' % train_loss)
            sys.stdout.write('\nValid acc: %.6f' % valid_acc)
            sys.stdout.write('\tValid Loss: %.6f' % valid_loss)
            sys.stdout.write('\nBest acc: %.6f' % best_acc)
            sys.stdout.write('\tBest Loss: %.6f' % best_loss)
            sys.stdout.write('\n\n')

            #print('\nBatches: %d' % steps, end='')
            #print('\nBatch Time: %4fs' % (1.0 * (time.time() - start_time) / config.summary_steps), end='')

            #print('\nTrain acc: %.6f' % train_acc, end='')
            #print('\tTrain Loss: %.6f' % train_loss, end='')
            #print('\nValid acc: %.6f' % valid_acc, end='')
            #print('\tValid Loss: %.6f' % valid_loss, end='')
            #print('\nBest acc: %.6f' % best_acc, end='')
            #print('\tBest Loss: %.6f' % best_loss, end='')
            #print('\n\n')
            start_time = time.time()

    print('\nModel saved at {}'.format(config.exp_dir))
예제 #10
0
    def _load_datasets(self):
        config = self.config
        self.train_datasets = []
        self.valid_datasets = []
        self.test_datasets = []
        for task_num, task in enumerate(config.task_names):
            train_datasets = Dataset()
            valid_datasets = Dataset()
            test_datasets = Dataset()
            train_datasets.load_json(config.train_data_files[task_num])
            valid_datasets.load_json(config.valid_data_files[task_num])
            test_datasets.load_json(config.test_data_files[task_num])

            # Only use a quarter of the dataset to fasten the training.
            train_datasets.resize(int(train_datasets._num_examples * 0.7), shuffle=True)
            valid_datasets.resize(int(valid_datasets._num_examples * 0.7), shuffle=True)
            test_datasets.resize(int(test_datasets._num_examples * 0.7), shuffle=True)

            self.train_datasets.append(train_datasets)
            self.valid_datasets.append(valid_datasets)
            self.test_datasets.append(test_datasets)
예제 #11
0
파일: reg.py 프로젝트: safpla/autoLoss
class Reg(Basic_model):
    def __init__(self, config, exp_name='new_exp', loss_mode='1'):
        self.config = config
        self.graph = tf.Graph()
        gpu_options = tf.GPUOptions(allow_growth=True)
        configProto = tf.ConfigProto(gpu_options=gpu_options)
        self.sess = tf.InteractiveSession(config=configProto, graph=self.graph)
        # ----Loss_mode is only for DEBUG usage.----
        #   0: only mse, 1: mse & l1
        self.loss_mode = loss_mode
        self.exp_name = exp_name
        train_data_file = config.train_data_file
        train_stud_data_file = config.train_stud_data_file
        valid_data_file = config.valid_data_file
        self.train_dataset = Dataset()
        self.train_dataset.load_npy(train_data_file)
        self.valid_dataset = Dataset()
        self.valid_dataset.load_npy(valid_data_file)
        self.train_stud_dataset = Dataset()
        self.train_stud_dataset.load_npy(train_stud_data_file)
        self.reset()
        self._build_placeholder()
        self._build_graph()
        self.reward_baseline = None
        self.improve_baseline = None

    def reset(self):
        """ Reset the model """
        # TODO(haowen) The way to carry step number information should be
        # reconsiderd
        self.step_number = [0]
        self.previous_mse_loss = [0] * self.config.num_pre_loss
        self.previous_l1_loss = [0] * self.config.num_pre_loss
        self.previous_action = [0, 0]
        self.previous_valid_loss = [0] * self.config.num_pre_loss
        self.previous_train_loss = [0] * self.config.num_pre_loss
        self.mag_mse_grad = 0
        self.mag_l1_grad = 0

        # to control when to terminate the episode
        self.endurance = 0
        self.best_loss = 1e10
        self.improve_baseline = None

    def _build_placeholder(self):
        x_size = self.config.dim_input_stud
        with self.graph.as_default():
            self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32)
            self.y_plh = tf.placeholder(shape=[None], dtype=tf.float32)

    def _build_graph(self):
        h_size = self.config.dim_hidden_stud
        y_size = self.config.dim_output_stud
        lr = self.config.lr_stud

        with self.graph.as_default():
            # ----quadratic equation----
            #  ---first order---
            x_size = self.config.dim_input_stud
            initial = tf.random_normal(shape=[x_size, 1], stddev=0.1, seed=1)
            w1 = tf.Variable(initial)
            sum1 = tf.matmul(self.x_plh, w1)

            #  ---second order---
            initial = tf.random_normal(shape=[x_size, x_size],
                                       stddev=0.01,
                                       seed=1)
            w2 = tf.Variable(initial)
            xx = tf.matmul(tf.reshape(self.x_plh, [-1, x_size, 1]),
                           tf.reshape(self.x_plh, [-1, 1, x_size]))
            sum2 = tf.matmul(tf.reshape(xx, [-1, x_size * x_size]),
                             tf.reshape(w2, [x_size * x_size, 1]))
            # NOTE(Haowen): Divide by 10 is important here to promise
            # convergence.
            self.pred = sum1 + sum2 / 10
            self.w1 = w1
            self.w2 = w2

            # define loss
            self.loss_mse = tf.reduce_mean(
                tf.square(tf.squeeze(self.pred) - self.y_plh))

            # NOTE(Haowen): Somehow the l1 regularizers provided by tf
            # provide a better performance than self-designed regularizers
            # showing in the flowing 6 lines.

            #self.loss_l1 = self.config.lambda1_stud * (
            #    tf.reduce_sum(tf.reduce_sum(tf.abs(w2)))\
            #    + tf.reduce_sum(tf.reduce_sum(tf.abs(w1))))

            tvars = tf.trainable_variables()
            l1_regularizer = tf.contrib.layers.l1_regularizer(
                scale=self.config.lambda1_stud, scope=None)
            self.loss_l1 = tf.contrib.layers.apply_regularization(
                l1_regularizer, tvars)
            if self.loss_mode == '0':
                self.loss_total = self.loss_mse
                print('mse loss')
            elif self.loss_mode == '1':
                self.loss_total = self.loss_mse + self.loss_l1
                print('mse loss and l1 loss')
                print('lambda1:', self.config.lambda1_stud)
            else:
                raise NotImplementedError

            # ----Define update operation.----
            optimizer = tf.train.AdamOptimizer(lr)
            mse_gvs = optimizer.compute_gradients(self.loss_mse, tvars)
            l1_gvs = optimizer.compute_gradients(self.loss_l1, tvars)
            total_gvs = optimizer.compute_gradients(self.loss_total, tvars)
            self.update_mse = optimizer.apply_gradients(mse_gvs)
            self.update_l1 = optimizer.apply_gradients(l1_gvs)
            self.update_total = optimizer.apply_gradients(total_gvs)
            self.init = tf.global_variables_initializer()
            self.mse_grad = [grad for grad, _ in mse_gvs]
            self.l1_grad = [grad for grad, _ in l1_gvs]

    def train(self):
        """ Optimize mse loss, l1 loss at the same time """
        data = self.train_dataset.next_batch(self.config.batch_size)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        loss, _ = self.sess.run([self.loss_total, self.update_total],
                                feed_dict=feed_dict)
        return loss

    def valid(self, dataset=None):
        """ test on validation set """
        if not dataset:
            dataset = self.valid_dataset
        data = dataset.next_batch(dataset.num_examples)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        fetch = [self.loss_mse, self.pred, self.y_plh]
        [loss_mse, pred, gdth] = self.sess.run(fetch, feed_dict=feed_dict)
        loss_mse_np = np.mean(np.square(pred - gdth))
        return loss_mse, pred, gdth

    def response(self, action, lr, mode='TRAIN'):
        """ Given an action, return the new state, reward and whether dead

        Args:
            action: one hot encoding of actions
            lr: when lr needs decay

        Returns:
            state: shape = [dim_state_rl]
            reward: shape = [1]
            dead: boolean
        """
        if mode == 'TRAIN':
            dataset = self.train_dataset
        else:
            dataset = self.train_stud_dataset

        data = dataset.next_batch(self.config.batch_size)
        sess = self.sess
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}

        if action[0] == 1:
            # ----Update mse loss.----
            sess.run(self.update_mse, feed_dict=feed_dict)
        elif action[1] == 1:
            # ----Update l1 loss.----
            sess.run(self.update_l1, feed_dict=feed_dict)
        fetch = [self.loss_mse, self.loss_l1, self.mse_grad, self.l1_grad]
        loss_mse, loss_l1, mse_grad, l1_grad = sess.run(fetch,
                                                        feed_dict=feed_dict)
        valid_loss, _, _ = self.valid()
        train_loss, _, _ = self.valid(dataset=dataset)

        # ----Update state.----
        self.previous_mse_loss = self.previous_mse_loss[1:] + [
            loss_mse.tolist()
        ]
        self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()]
        self.previous_action = action.tolist()
        self.step_number[0] += 1
        self.previous_valid_loss = self.previous_valid_loss[1:]\
            + [valid_loss.tolist()]
        self.previous_train_loss = self.previous_train_loss[1:]\
            + [train_loss.tolist()]
        self.mag_mse_grad = self.get_grads_magnitude(mse_grad)
        self.mag_l1_grad = self.get_grads_magnitude(l1_grad)

        reward = self.get_step_reward()
        # ----Early stop and record best result.----
        dead = self.check_terminate()
        state = self.get_state()
        return state, reward, dead

    def check_terminate(self):
        # TODO(haowen)
        # Episode terminates on two condition:
        # 1) Convergence: valid loss doesn't improve in endurance steps
        # 2) Collapse: action space collapse to one action (not implement yet)
        step = self.step_number[0]
        if step % self.config.valid_frequency_stud == 0:
            self.endurance += 1
            loss, _, _ = self.valid()
            if loss < self.best_loss:
                self.best_step = self.step_number[0]
                self.best_loss = loss
                self.endurance = 0

        if step > self.config.max_training_step:
            return True
        if self.config.stop_strategy_stud == 'exceeding_endurance' and \
                self.endurance > self.config.max_endurance_stud:
            return True
        elif self.config.stop_strategy_stud == 'prescribed_conv_target':
            if self.best_loss < self.config.conv_target_stud:
                return True
        return False

    def get_step_reward(self):
        # TODO(haowen) Use the decrease of validation loss as step reward
        if self.improve_baseline is None:
            # ----First step, nothing to compare with.----
            improve = 0.1
        else:
            improve = (self.previous_valid_loss[-2] -
                       self.previous_valid_loss[-1])

        # TODO(haowen) Try to use sqrt function instead of sign function
        if self.improve_baseline is None:
            self.improve_baseline = improve
        decay = self.config.reward_baseline_decay
        self.improve_baseline = decay * self.improve_baseline\
            + (1 - decay) * improve

        #TODO(haowen) Remove nonlinearity
        value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5))
        #value = abs(improve) / (abs(self.improve_baseline) + 1e-5)
        value = min(value, self.config.reward_max_value)
        return math.copysign(value, improve) * self.config.reward_step_rl

    def get_final_reward(self):
        assert self.best_loss < 1e10 - 1
        loss_mse = self.best_loss
        reward = self.config.reward_c / loss_mse

        if self.reward_baseline is None:
            self.reward_baseline = reward
        logger.info('reward: {}'.format(reward))
        logger.info('reward_baseline: {}'.format(self.reward_baseline))
        decay = self.config.reward_baseline_decay
        adv = reward - self.reward_baseline
        adv = min(adv, self.config.reward_max_value)
        adv = max(adv, -self.config.reward_max_value)
        # TODO(haowen) Try to use maximum instead of shift average as baseline
        # Result: doesn't seem to help too much
        # ----Shift average----
        self.reward_baseline = decay * self.reward_baseline\
            + (1 - decay) * reward
        # ----Maximun----
        #if self.reward_baseline < reward:
        #    self.reward_baseline = reward
        return reward, adv

    def get_state(self):
        abs_diff = []
        rel_diff = []
        if self.improve_baseline is None:
            ib = 1
        else:
            ib = self.improve_baseline

        for v, t in zip(self.previous_valid_loss, self.previous_train_loss):
            abs_diff.append(v - t)
            if t > 1e-6:
                rel_diff.append((v - t) / t)
            else:
                rel_diff.append(0)

        state = ([rel_diff[-1] * 10] + _normalize1([abs(ib)]) +
                 _normalize2(self.previous_mse_loss[-1:]) +
                 _normalize3(self.previous_l1_loss[-1:]) +
                 [self.mag_mse_grad - self.mag_l1_grad])
        return np.array(state, dtype='f')
예제 #12
0
class Reg(Basic_model):
    def __init__(self, config, sess, exp_name='new_exp', loss_mode='1'):
        super(Reg, self).__init(config, sess, exp_name)
        # ----Loss_mode is only for DEBUG usage.----
        #   0: only mse, 1: mse & l1
        self.update_steps = 0
        self.loss_mode = loss_mode
        self.exp_name = exp_name
        self._load_dataset()
        with tf.variable_scope(exp_name):
            self._build_placeholder()
            self._build_graph()
        self.reset()
        self.reward_baseline = None
        self.improve_baseline = None

    def reset(self):
        """ Reset the model """
        # TODO(haowen) The way to carry step number information should be
        # reconsiderd
        self.update_steps = 0
        self.previous_action = [0, 0]
        num_pre = self.config.task.num_pre_loss
        self.previous_mse_loss = deque(maxlen=num_pre)
        self.previous_l1_loss = deque(maxlen=num_pre)
        self.previous_valid_loss = deque(maxlen=num_pre)
        self.previous_train_loss = deque(maxlen=num_pre)
        self.improve_history = deque(maxlen=20)

        # to control when to terminate the episode
        self.endurance = 0
        self.best_loss = 1e10
        self.improve_baseline = None

    def _load_dataset(self):
        conf = self.config.data
        train_c_data_file = conf.train_c_data_file
        valid_c_data_file = conf.valid_c_data_file
        train_t_data_file = conf.train_t_data_file
        valid_t_data_file = conf.valid_t_data_file
        test_data_file = conf.test_data_file
        self.train_c_dataset = Dataset()
        self.train_c_dataset.load_npy(train_c_data_file)
        self.valid_c_dataset = Dataset()
        self.valid_c_dataset.load_npy(valid_c_data_file)
        self.train_t_dataset = Dataset()
        self.train_t_dataset.load_npy(train_t_data_file)
        self.valid_t_dataset = Dataset()
        self.valid_t_dataset.load_npy(valid_t_data_file)
        self.test_dataset = Dataset()
        self.test_dataset.load_npy(test_data_file)

    def _build_placeholder(self):
        x_size = self.config.task.dim_input
        with self.variable_scope('placeholder'):
            self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32)
            self.y_plh = tf.placeholder(shape=[None], dtype=tf.float32)

    def _build_graph(self):
        config = self.config.task
        h_size = config.dim_hidden
        y_size = config.dim_output
        lr = config.lr

        with self.variable_scope('quadratic'):
            # ----quadratic equation----
            #  ---first order---
            x_size = config.dim_input
            initial = tf.random_normal(shape=[x_size, 1], stddev=0.1, seed=1)
            w1 = tf.Variable(initial)
            sum1 = tf.matmul(self.x_plh, w1)

            #  ---second order---
            initial = tf.random_normal(shape=[x_size, x_size],
                                       stddev=0.01,
                                       seed=1)
            w2 = tf.Variable(initial)
            xx = tf.matmul(tf.reshape(self.x_plh, [-1, x_size, 1]),
                           tf.reshape(self.x_plh, [-1, 1, x_size]))
            sum2 = tf.matmul(tf.reshape(xx, [-1, x_size * x_size]),
                             tf.reshape(w2, [x_size * x_size, 1]))
            # NOTE(Haowen): Divide by 10 is important here to promise
            # convergence.
            self.pred = sum1 + sum2
            self.w1 = w1
            self.w2 = w2

            # define loss
            self.loss_mse = tf.reduce_mean(
                tf.square(tf.squeeze(self.pred) - self.y_plh))

            tvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope=self.exp_name)
            l1_regularizer = tf.contrib.layers.l1_regularizer(
                scale=config.lambda1, scope=None)
            self.loss_l1 = tf.contrib.layers.apply_regularization(
                l1_regularizer, tvars)
            l2_regularizer = tf.contrib.layers.l2_regularizer(
                scale=config.lambda2, scope=None)
            self.loss_l2 = tf.contrib.layers.apply_regularization(
                l2_regularizer, tvars)
            if self.loss_mode == '0':
                self.loss_total = self.loss_mse
                print('mse loss')
            elif self.loss_mode == '1':
                self.loss_total = self.loss_mse + self.loss_l1
                print('mse loss and l1 loss')
                print('lambda1:', config.lambda1)
            elif self.loss_mode == '2':
                self.loss_total = self.loss_mse + self.loss_l1 + self.loss_l2
                print('mse loss, l1 loss and l2 loss')
                print('lambda1: {}, lambda2: {}'.format(
                    config.lambda1, config.lambda2))
            else:
                raise NotImplementedError

            # ----Define update operation.----
            self.update_mse = tf.train.GradientDescentOptimizer(lr).\
                minimize(self.loss_mse)
            self.update_l1 = tf.train.GradientDescentOptimizer(lr).\
                minimize(self.loss_l1)
            self.update_l2 = tf.train.GradientDescentOptimizer(lr).\
                minimize(self.loss_l2)
            self.update_total = tf.train.GradientDescentOptimizer(lr).\
                minimize(self.loss_total)
            self.tvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                           scope=self.exp_name)
            self.init = tf.global_variables_initializer(self.tvars)

    def update_total(self):
        data = self.train_t_dataset.next_batch(self.config.task.batch_size)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        loss, _ = self.sess.run([self.loss_total, self.update_total],
                                feed_dict=feed_dict)
        return loss

    def valid(self, dataset=None):
        """ test on given dataset """
        if not dataset:
            dataset = self.valid_t_dataset
        data = dataset.next_batch(dataset.num_examples)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        fetch = [self.loss_mse, self.pred, self.y_plh]
        loss_mse, pred, gdth = self.sess.run(fetch, feed_dict=feed_dict)
        return loss_mse, pred, gdth

    def response(self, action, mode='TRAIN'):
        """ Given an action, return the new state, reward and whether dead

        Args:
            action: one hot encoding of actions

        Returns:
            state: shape = [dim_state_rl]
            reward: shape = [1]
            dead: boolean
        """
        if mode == 'TRAIN':
            dataset = self.train_c_dataset
            dataset_v = self.valid_c_dataset
        else:
            dataset = self.train_t_dataset
            dataset_v = self.valid_t_dataset

        data = dataset.next_batch(self.config.task.batch_size)
        sess = self.sess
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        fetch = [self.loss_mse, self.loss_l1]

        if action == 0:
            # ----Update mse loss.----
            sess.run(self.update_mse, feed_dict=feed_dict)
        elif action == 1:
            # ----Update l1 loss.----
            sess.run(self.update_l1, feed_dict=feed_dict)
        elif action == 2:
            # ----Update l2 loss.----
            sess.run(self.update_l2, feed_dict=feed_dict)

        loss_mse, loss_l1 = sess.run(fetch, feed_dict=feed_dict)
        valid_loss, _, _ = self.valid(dataset=dataset_v)
        train_loss, _, _ = self.valid(dataset=dataset)

        # ----Update state.----
        self.previous_mse_loss = self.previous_mse_loss[1:] + [
            loss_mse.tolist()
        ]
        self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()]
        self.previous_action = action.tolist()
        self.update_steps += 1
        self.previous_valid_loss = self.previous_valid_loss[1:]\
            + [valid_loss.tolist()]
        self.previous_train_loss = self.previous_train_loss[1:]\
            + [train_loss.tolist()]

        reward = self.get_step_reward()
        # ----Early stop and record best result.----
        dead = self.check_terminate()
        state = self.get_state()
        return state, reward, dead

    def check_terminate(self):
        # TODO(haowen)
        # Episode terminates on two condition:
        # 1) Convergence: valid loss doesn't improve in endurance steps
        # 2) Collapse: action space collapse to one action (not implement yet)
        step = self.update_steps
        if step % self.config.task.valid_frequency == 0:
            self.endurance += 1
            loss, _, _ = self.valid()
            if loss < self.best_loss:
                self.best_step = self.update_steps
                self.best_loss = loss
                self.endurance = 0
            if self.endurance > self.config.task.max_endurance:
                return True
        return False

    def get_step_reward(self):
        # TODO(haowen) Use the decrease of validation loss as step reward
        if self.improve_baseline is None:
            # ----First step, nothing to compare with.----
            improve = 0.1
        else:
            improve = (self.previous_valid_loss[-2] -
                       self.previous_valid_loss[-1])

        self.improve_history.append(improve)
        self.improve_baseline = mean(self.improve_history)

        #TODO(haowen) Remove nonlinearity
        value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5))
        #value = abs(improve) / (abs(self.improve_baseline) + 1e-5)
        value = min(value, self.config.meta.reward_max_value)
        return math.copysign(value, improve)

    def get_final_reward(self):
        assert self.best_loss < 1e10 - 1
        loss_mse = self.best_loss
        reward = self.config.meta.reward_c / loss_mse

        if self.reward_baseline is None:
            self.reward_baseline = reward
        decay = self.config.meta.reward_baseline_decay
        adv = reward - self.reward_baseline
        adv = min(adv, self.config.meta.reward_max_value)
        adv = max(adv, -self.config.meta.reward_max_value)
        # TODO(haowen) Try to use maximum instead of shift average as baseline
        # Result: doesn't seem to help too much
        # ----Shift average----
        self.reward_baseline = decay * self.reward_baseline\
            + (1 - decay) * reward
        # ----Maximun----
        #if self.reward_baseline < reward:
        #    self.reward_baseline = reward
        return reward, adv

    def get_state(self):
        abs_diff = []
        rel_diff = []
        if self.improve_baseline is None:
            ib = 1
        else:
            ib = self.improve_baseline

        for v, t in zip(self.previous_valid_loss, self.previous_train_loss):
            abs_diff.append(v - t)
            if t > 1e-6:
                rel_diff.append(v / t)
            else:
                rel_diff.append(1)

        state = ([math.log(rel_diff[-1])] + _normalize1([abs(ib)]) +
                 _normalize2(self.previous_mse_loss[-1:]) +
                 self.previous_l1_loss[-1:])
        return np.array(state, dtype='f')
예제 #13
0
class Gan_grid(Gan):
    def __init__(self, config, exp_name='new_exp_gan_grid'):
        self.config = config
        self.graph = tf.Graph()
        self.exp_name = exp_name
        gpu_options = tf.GPUOptions(allow_growth=True)
        configProto = tf.ConfigProto(gpu_options=gpu_options)
        self.sess = tf.InteractiveSession(config=configProto, graph=self.graph)
        # ----Loss_mode is only for DEBUG usage.----
        self.train_dataset = Dataset()
        self.train_dataset.load_npy(config.train_data_file)
        self.valid_dataset = Dataset()
        self.valid_dataset.load_npy(config.valid_data_file)
        self._build_placeholder()
        self._build_graph()
        self.final_hq_baseline = None  # average reward over episodes
        self.reset()
        self.fixed_noise_10000 = np.random.normal(size=(10000, config.dim_z))\
            .astype('float32')

    def reset(self):
        # ----Reset the model.----
        # TODO(haowen) The way to carry step number information should be
        # reconsiderd
        self.step_number = 0
        self.ema_gen_cost = None
        self.ema_disc_cost_real = None
        self.ema_disc_cost_fake = None
        self.prst_gen_cost = None
        self.prst_disc_cost_real = None
        self.prst_disc_cost_fake = None
        self.hq = 0
        self.entropy = 0

        # to control when to terminate the episode
        self.endurance = 0
        self.best_hq = 0
        self.hq_baseline = 0
        self.collapse = False
        self.previous_action = -1
        self.same_action_count = 0

    def _build_placeholder(self):
        with self.graph.as_default():
            dim_x = self.config.dim_x
            dim_z = self.config.dim_z
            bs = self.config.batch_size
            self.real_data = tf.placeholder(tf.float32,
                                            shape=[None, dim_x],
                                            name='real_data')
            self.noise = tf.placeholder(tf.float32,
                                        shape=[None, dim_z],
                                        name='noise')
            self.is_training = tf.placeholder(tf.bool, name='is_training')

    def _build_graph(self):
        dim_x = self.config.dim_x
        dim_z = self.config.dim_z
        batch_size = self.config.batch_size
        lr = self.config.lr_stud
        beta1 = self.config.beta1
        beta2 = self.config.beta2

        with self.graph.as_default():
            real_data = self.real_data
            fake_data = self.generator(self.noise)
            disc_real = self.discriminator(real_data)
            disc_fake = self.discriminator(fake_data, reuse=True)

            gen_cost = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=disc_fake, labels=tf.ones_like(disc_fake)))
            disc_cost_fake = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=disc_fake, labels=tf.zeros_like(disc_fake)))
            disc_cost_real = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=disc_real, labels=tf.ones_like(disc_real)))
            disc_cost = (disc_cost_fake + disc_cost_real) / 2.

            tvars = tf.trainable_variables()
            gen_tvars = [v for v in tvars if 'Generator' in v.name]
            disc_tvars = [v for v in tvars if 'Discriminator' in v.name]

            gen_grad = tf.gradients(gen_cost, gen_tvars)
            disc_grad = tf.gradients(disc_cost, disc_tvars)
            optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                               beta1=beta1,
                                               beta2=beta2)
            gen_train_op = optimizer.apply_gradients(zip(gen_grad, gen_tvars))
            disc_train_op = optimizer.apply_gradients(
                zip(disc_grad, disc_tvars))

            self.saver = tf.train.Saver()
            self.init = tf.global_variables_initializer()
            self.fake_data = fake_data
            self.gen_train_op = gen_train_op
            self.disc_train_op = disc_train_op
            self.update = [gen_train_op, disc_train_op]

            self.gen_cost = gen_cost
            self.gen_grad = gen_grad
            self.gen_tvars = gen_tvars

            self.disc_cost_fake = disc_cost_fake
            self.disc_cost_real = disc_cost_real
            self.disc_grad = disc_grad
            self.disc_tvars = disc_tvars
            self.disc_real = disc_real
            self.disc_fake = disc_fake

    def generator(self, input):
        dim_x = self.config.dim_x
        n_hidden = self.config.n_hidden_gen
        with tf.variable_scope('Generator'):
            output = layers.linear(input, n_hidden, name='LN1', stdev=0.2)
            output = layers.batchnorm(output,
                                      is_training=self.is_training,
                                      name='BN1')
            output = tf.nn.relu(output)
            output = layers.linear(output, n_hidden, name='LN2', stdev=0.2)
            output = layers.batchnorm(output,
                                      is_training=self.is_training,
                                      name='BN2')
            output = tf.nn.relu(output)
            output = layers.linear(output, dim_x, name='LN3', stdev=0.2)
            #output = slim.fully_connected(input, n_hidden,
            #                              activation_fn=tf.nn.relu)
            #output = slim.fully_connected(output, n_hidden,
            #                              activation_fn=tf.nn.relu)
            #output = slim.fully_connected(output, dim_x, activation_fn=None)
            return output

    def discriminator(self, input, reuse=False):
        n_hidden = self.config.n_hidden_disc
        with tf.variable_scope('Discriminator') as scope:
            if reuse:
                scope.reuse_variables()
            output = layers.linear(input, n_hidden, name='LN1', stdev=0.2)
            output = tf.nn.relu(output)
            output = layers.linear(output, 1, name='LN2', stdev=0.2)
            #output = slim.fully_connected(input, n_hidden,
            #                              activation_fn=tf.nn.relu)
            #output = slim.fully_connected(output, 1, activation_fn=None)
            return tf.reshape(output, [-1])

    def train(self, save_model=False):
        sess = self.sess
        config = self.config
        batch_size = config.batch_size
        dim_z = config.dim_z
        valid_frequency = config.valid_frequency_stud
        print_frequency = config.print_frequency_stud
        best_hq = 0
        hq_baseline = 0
        best_entropy = 0
        endurance = 0
        decay = config.metric_decay
        steps_per_iteration = config.disc_iters + config.gen_iters

        for step in range(0, config.max_training_step, steps_per_iteration):
            # ----Update D network.----
            for i in range(config.disc_iters):
                data = self.train_dataset.next_batch(batch_size)
                x = data['input']
                z = np.random.normal(size=[batch_size, dim_z]).astype(
                    np.float32)
                feed_dict = {
                    self.noise: z,
                    self.real_data: x,
                    self.is_training: True
                }
                sess.run(self.disc_train_op, feed_dict=feed_dict)

            # ----Update G network.----
            for i in range(config.gen_iters):
                z = np.random.normal(size=[batch_size, dim_z]).astype(
                    np.float32)
                feed_dict = {self.noise: z, self.is_training: True}
                sess.run(self.gen_train_op, feed_dict=feed_dict)

            if step % valid_frequency == 0:
                logger.info('========Step{}========'.format(step))
                logger.info(endurance)
                metrics = self.get_metrics_5x5(num_batches=100)
                logger.info(metrics)
                hq = metrics[0]
                if hq_baseline > 0:
                    hq_baseline = hq_baseline * decay + hq * (1 - decay)
                else:
                    hq_baseline = hq
                logger.info('hq_baseline: {}'.format(hq_baseline))
                self.generate_plot(step)
                endurance += 1
                if hq_baseline > best_hq:
                    best_hq = hq_baseline
                    endurance = 0
                    if save_model:
                        self.save_model(step)

            if step % print_frequency == 0:
                data = self.train_dataset.next_batch(batch_size)
                x = data['input']
                z = np.random.normal(size=[batch_size, dim_z]).astype(
                    np.float32)
                feed_dict = {
                    self.noise: z,
                    self.real_data: x,
                    self.is_training: False
                }
                fetch = [
                    self.gen_cost, self.disc_cost_fake, self.disc_cost_real
                ]
                r = sess.run(fetch, feed_dict=feed_dict)
                logger.info('gen_cost: {}'.format(r[0]))
                logger.info('disc_cost fake: {}, real: {}'.format(r[1], r[2]))

            if endurance > config.max_endurance_stud:
                break
        logger.info('best_hq: {}'.format(best_hq))

    def get_state(self):
        if self.step_number == 0:
            state = [0] * self.config.dim_state_rl
        else:
            state = [
                self.step_number / self.config.max_training_step,
                math.log(self.mag_disc_grad / self.mag_gen_grad),
                self.ema_gen_cost,
                (self.ema_disc_cost_real + self.ema_disc_cost_fake) / 2,
                self.hq, self.entropy / 3.2
            ]
        return np.array(state, dtype='f')

    def check_terminate(self):
        # TODO(haowen)
        # Early stop and recording the best result
        # Episode terminates on two condition:
        # 1) Convergence: inception score doesn't improve in endurance steps
        # 2) Collapse: action space collapse to one action (not implement yet)
        if self.same_action_count > 500:
            logger.info('Terminate reason: Collapse')
            self.collapse = True
            return True
        step = self.step_number
        if step % self.config.valid_frequency_stud == 0:
            self.endurance += 1
            metrics = self.get_metrics_5x5(100)
            hq = metrics[0]
            entropy = metrics[1]
            self.hq = hq
            self.entropy = entropy
            decay = self.config.metric_decay
            if self.hq_baseline > 0:
                self.hq_baseline = self.hq_baseline * decay + hq * (1 - decay)
            else:
                self.hq_baseline = hq
            if self.hq_baseline > self.best_hq:
                logger.info('step: {}, new best result: {}'.\
                            format(step, self.hq_baseline))
                self.best_step = self.step_number
                self.best_hq = self.hq_baseline
                self.best_entropy = entropy
                self.endurance = 0
                self.save_model(step)

        if step % self.config.print_frequency_stud == 0:
            logger.info('----step{}----'.format(step))
            logger.info('hq: {}, entropy: {}'.format(hq, entropy))
            logger.info('hq_baseline: {}'.format(self.hq_baseline))

        if step > self.config.max_training_step:
            return True
        if self.config.stop_strategy_stud == 'prescribed_steps':
            pass
        elif self.config.stop_strategy_stud == 'exceeding_endurance' and \
                self.endurance > self.config.max_endurance_stud:
            return True
        return False

    def get_step_reward(self):
        return 0

    def get_final_reward(self):
        if self.collapse:
            return 0, -self.config.reward_max_value
        if self.best_entropy < 2.5:
            # lose mode, fail trail
            logger.info('lose mode with entropy: {}'.format(self.best_entropy))
            return 0, -self.config.reward_max_value

        hq = self.best_hq
        reward = self.config.reward_c * hq**2
        if self.final_hq_baseline is None:
            self.final_hq_baseline = hq
        baseline_hq = self.final_hq_baseline
        baseline_reward = self.config.reward_c * baseline_hq**2
        decay = self.config.inps_baseline_decay
        adv = reward - baseline_reward
        adv = min(adv, self.config.reward_max_value)
        adv = max(adv, -self.config.reward_max_value)
        # ----Shift average----
        self.final_hq_baseline = decay * self.final_hq_baseline\
            + (1 - decay) * hq
        return reward, adv

    def get_metrics_5x5(self, num_batches=100):
        all_samples = []
        config = self.config
        batch_size = 100
        dim_z = config.dim_z
        for i in range(num_batches):
            z = np.random.normal(size=[batch_size, dim_z]).astype(np.float32)
            feed_dict = {self.noise: z, self.is_training: False}
            samples = self.sess.run(self.fake_data, feed_dict=feed_dict)
            all_samples.append(samples)
        all_samples = np.concatenate(all_samples, axis=0)

        centers = []
        for i in range(5):
            for j in range(5):
                centers.append([i * 0.5 - 1, j * 0.5 - 1])
        centers = np.array(centers)
        distance = np.zeros([batch_size * num_batches, 25])
        for i in range(25):
            distance[:, i] = np.sqrt(
                np.square(all_samples[:, 0] - centers[i, 0]) +
                np.square(all_samples[:, 1] - centers[i, 1]))
        high_quality = distance < config.var_noise * 3
        count_cluster = np.sum(high_quality, 0)
        hq = np.sum(count_cluster) / (num_batches * batch_size)
        p_cluster = count_cluster / np.sum(count_cluster)
        #print('hq:', np.sum(hq))
        #print('count_cluster:', count_cluster)
        #print('p_cluster:', p_cluster)
        p_cluster += 1e-8
        entropy = -np.sum(p_cluster * np.log(p_cluster))
        #print('entropy:', entropy)
        return hq, entropy

    def get_metrics_2x2(self, num_batches=100):
        all_samples = []
        config = self.config
        batch_size = 100
        dim_z = config.dim_z
        for i in range(num_batches):
            z = np.random.normal(size=[batch_size, dim_z]).astype(np.float32)
            feed_dict = {self.noise: z, self.is_training: False}
            samples = self.sess.run(self.fake_data, feed_dict=feed_dict)
            all_samples.append(samples)
        all_samples = np.concatenate(all_samples, axis=0)

        centers = []
        for i in range(2):
            for j in range(2):
                centers.append([i * 1.0 - 0.5, j * 1.0 - 0.5])
        centers = np.array(centers)
        distance = np.zeros([batch_size * num_batches, 4])
        for i in range(4):
            distance[:, i] = np.sqrt(
                np.square(all_samples[:, 0] - centers[i, 0]) +
                np.square(all_samples[:, 1] - centers[i, 1]))
        high_quality = distance < config.var_noise * 3
        count_cluster = np.sum(high_quality, 0)
        hq = np.sum(count_cluster) / (num_batches * batch_size)
        p_cluster = count_cluster / np.sum(count_cluster)
        #print('hq:', np.sum(hq))
        print('count_cluster:', count_cluster)
        #print('p_cluster:', p_cluster)
        p_cluster += 1e-8
        entropy = -np.sum(p_cluster * np.log(p_cluster))
        #print('entropy:', entropy)
        return hq, entropy

    def generate_plot(self, step):
        feed_dict = {
            self.noise: self.fixed_noise_10000,
            self.is_training: False
        }
        samples = self.sess.run(self.fake_data, feed_dict=feed_dict)
        task_dir = os.path.join(self.config.save_images_dir, self.exp_name)
        if not os.path.exists(task_dir):
            os.mkdir(task_dir)
        save_path = os.path.join(task_dir, 'images_{}.png'.format(step))

        plt.scatter(samples[:, 0], samples[:, 1])
        plt.show()
        plt.savefig(save_path, bbox_inches='tight')
        plt.close()
예제 #14
0
파일: cls.py 프로젝트: safpla/autoLoss
class Cls(Basic_model):
    def __init__(self, config, exp_name='new_exp', loss_mode='1'):
        self.config = config
        self.graph = tf.Graph()
        gpu_options = tf.GPUOptions(allow_growth=True)
        configProto = tf.ConfigProto(gpu_options=gpu_options)
        self.sess = tf.InteractiveSession(config=configProto, graph=self.graph)
        self.exp_name = exp_name
        self.loss_mode = loss_mode
        train_data_file = config.train_data_file
        valid_data_file = config.valid_data_file
        test_data_file = config.test_data_file
        train_stud_data_file = config.train_stud_data_file
        self.train_dataset = Dataset()
        self.train_dataset.load_npy(train_data_file)
        self.valid_dataset = Dataset()
        self.valid_dataset.load_npy(valid_data_file)
        self.test_dataset = Dataset()
        self.test_dataset.load_npy(test_data_file)
        self.train_stud_dataset = Dataset()
        self.train_stud_dataset.load_npy(train_stud_data_file)
        self.reset()
        self._build_placeholder()
        self._build_graph()
        self.reward_baseline = None  # average reward over episodes
        self.improve_baseline = None  # averge improvement over steps

    def reset(self):
        # ----Reset the model.----
        # TODO(haowen) The way to carry step number information should be
        # reconsiderd
        self.step_number = [0]
        self.previous_ce_loss = [0] * self.config.num_pre_loss
        self.previous_l1_loss = [0] * self.config.num_pre_loss
        self.previous_valid_acc = [0] * self.config.num_pre_loss
        self.previous_train_acc = [0] * self.config.num_pre_loss
        self.previous_action = [0] * self.config.dim_action_rl
        self.previous_valid_loss = [0] * self.config.num_pre_loss
        self.previous_train_loss = [0] * self.config.num_pre_loss
        self.task_dir = None

        # to control when to terminate the episode
        self.endurance = 0
        self.best_loss = 1e10
        self.best_acc = 0
        self.test_acc = 0
        self.improve_baseline = None

    def _build_placeholder(self):
        x_size = self.config.dim_input_stud
        with self.graph.as_default():
            self.x_plh = tf.placeholder(shape=[None, x_size], dtype=tf.float32)
            self.y_plh = tf.placeholder(shape=[None], dtype=tf.int32)

    def _build_graph(self):
        x_size = self.config.dim_input_stud
        h_size = self.config.dim_hidden_stud
        y_size = self.config.dim_output_stud
        lr = self.config.lr_stud

        with self.graph.as_default():
            # ----3-layer ffn----
            #hidden1 = slim.fully_connected(self.x_plh, h_size,
            #                              activation_fn=tf.nn.tanh)
            #hidden2 = slim.fully_connected(hidden1, 32,
            #                               activation_fn=tf.nn.tanh)
            #self.pred = slim.fully_connected(hidden1, y_size,
            #                                 activation_fn=None)

            w1 = weight_variable([x_size, h_size], name='w1')
            b1 = bias_variable([h_size], name='b1')
            hidden = tf.nn.relu(tf.matmul(self.x_plh, w1) + b1)

            w2 = weight_variable([h_size, y_size], name='w2')
            b2 = bias_variable([y_size], name='b2')
            self.pred = tf.matmul(hidden, w2) + b2

            # define loss
            self.loss_ce = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.y_plh, logits=self.pred, name='loss'))
            y_ = tf.argmax(self.pred, 1, output_type=tf.int32)
            correct_prediction = tf.equal(y_, self.y_plh)
            self.correct_prediction = correct_prediction
            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))

            tvars = tf.trainable_variables()
            self.tvars = tvars
            l1_regularizer = tf.contrib.layers.l1_regularizer(
                scale=self.config.lambda1_stud, scope=None)
            self.loss_l1 = tf.contrib.layers.apply_regularization(
                l1_regularizer, tvars)
            l2_regularizer = tf.contrib.layers.l2_regularizer(
                scale=self.config.lambda2_stud, scope=None)
            self.loss_l2 = tf.contrib.layers.apply_regularization(
                l2_regularizer, tvars)
            if self.loss_mode == '0':
                self.loss_total = self.loss_ce
                print('ce loss')
            elif self.loss_mode == '1':
                self.loss_total = self.loss_ce + self.loss_l1
                print('ce loss and l1 loss')
                print('lambda1:', self.config.lambda1_stud)
            elif self.loss_mode == '2':
                self.loss_total = self.loss_ce + self.loss_l2
                print('ce loss and l2 loss')
                print('lambda2:', self.config.lambda2_stud)
            elif self.loss_mode == '3':
                self.loss_total = self.loss_ce + self.loss_l1 + self.loss_l2
                print('ce loss, l1 loss and l2 loss')
                print('lambda1:', self.config.lambda1_stud)
                print('lambda2:', self.config.lambda2_stud)
            else:
                raise NotImplementedError

            # ----Define update operation.----
            self.update_ce = tf.train.GradientDescentOptimizer(lr).\
                minimize(self.loss_ce)
            self.update_l1 = tf.train.GradientDescentOptimizer(lr).\
                minimize(self.loss_l1)
            self.update_l2 = tf.train.GradientDescentOptimizer(lr).\
                minimize(self.loss_l2)
            self.update_total = tf.train.GradientDescentOptimizer(lr).\
                minimize(self.loss_total)
            self.update = [self.update_ce, self.update_l1, self.update_l2]
            self.init = tf.global_variables_initializer()

    def train(self):
        # ----Optimize total loss.----
        data = self.train_dataset.next_batch(self.config.batch_size)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        fetch = [self.loss_ce, self.accuracy, self.update_total]
        loss, acc, _ = self.sess.run(fetch, feed_dict=feed_dict)
        return loss, acc

    def valid(self, dataset=None):
        # ----Test on validation set.----
        if not dataset:
            dataset = self.valid_dataset
        data = dataset.next_batch(dataset.num_examples)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}
        fetch = [self.loss_ce, self.accuracy, self.pred, self.y_plh]
        [loss_ce, acc, pred, gdth] = self.sess.run(fetch, feed_dict=feed_dict)
        return loss_ce, acc, pred, gdth

    def response(self, action, lr, mode='TRAIN'):
        # Given an action, return the new state, reward and whether dead

        # Args:
        #     action: one hot encoding of actions

        # Returns:
        #     state: shape = [dim_state_rl]
        #     reward: shape = [1]
        #     dead: boolean
        #
        sess = self.sess
        if mode == 'TRAIN':
            dataset = self.train_dataset
        else:
            dataset = self.train_dataset
        data = dataset.next_batch(self.config.batch_size)
        x = data['input']
        y = data['target']
        feed_dict = {self.x_plh: x, self.y_plh: y}

        a = np.argmax(np.array(action))
        sess.run(self.update[a], feed_dict=feed_dict)
        fetch = [self.loss_ce, self.loss_l1]
        loss_ce, loss_l1 = sess.run(fetch, feed_dict=feed_dict)
        valid_loss, valid_acc, _, _ = self.valid()
        train_loss, train_acc, _, _ = self.valid(dataset=dataset)

        # ----Update state.----
        self.previous_ce_loss = self.previous_ce_loss[1:] + [loss_ce.tolist()]
        self.previous_l1_loss = self.previous_l1_loss[1:] + [loss_l1.tolist()]
        self.previous_action = action.tolist()
        self.step_number[0] += 1
        self.previous_valid_loss = self.previous_valid_loss[1:]\
            + [valid_loss.tolist()]
        self.previous_train_loss = self.previous_train_loss[1:]\
            + [train_loss.tolist()]
        self.previous_valid_acc = self.previous_valid_acc[1:]\
            + [valid_acc.tolist()]
        self.previous_train_acc = self.previous_train_acc[1:]\
            + [train_acc.tolist()]

        reward = self.get_step_reward()
        # ----Early stop and record best result.----
        dead = self.check_terminate()
        state = self.get_state()
        return state, reward, dead

    def check_terminate(self):
        # TODO(haowen)
        # Early stop and recording the best result
        # Episode terminates on two condition:
        # 1) Convergence: valid loss doesn't improve in endurance steps
        # 2) Collapse: action space collapse to one action (not implement yet)
        step = self.step_number[0]
        if step % self.config.valid_frequency_stud == 0:
            self.endurance += 1
            loss, acc, _, _ = self.valid()
            if acc > self.best_acc:
                self.best_step = self.step_number[0]
                self.best_loss = loss
                self.best_acc = acc
                self.endurance = 0
                _, test_acc, _, _ = self.valid(dataset=self.test_dataset)
                self.test_acc = test_acc
            if self.endurance > self.config.max_endurance_stud:
                return True
        return False

    def get_step_reward(self):
        # TODO(haowen) Use the decrease of validation loss as step reward
        if self.improve_baseline is None:
            # ----First step, nothing to comparing with.----
            improve = 0.1
        else:
            improve = (self.previous_valid_loss[-2] -
                       self.previous_valid_loss[-1])

        # TODO(haowen) Try to use sqrt function instead of sign function
        # ----With baseline.----
        if self.improve_baseline is None:
            self.improve_baseline = improve
        decay = self.config.reward_baseline_decay
        self.improve_baseline = decay * self.improve_baseline\
            + (1 - decay) * improve

        #TODO(haowen) Remove nonlinearity
        value = math.sqrt(abs(improve) / (abs(self.improve_baseline) + 1e-5))
        #value = abs(improve) / (abs(self.improve_baseline) + 1e-5)
        value = min(value, self.config.reward_max_value)
        return math.copysign(value, improve) * self.config.reward_step_rl
        # ----Without baseline.----
        #return math.copysign(math.sqrt(abs(improve)), improve)

        # TODO(haowen) This design of reward may cause unbalance because
        # positive number is more than negative number in nature
        #if abs(improve) < 1e-5:
        #    return 0    # no reward if the difference is too small
        #elif improve > 0:
        #    # TODO(haowen) Try not to give reward to the reduce of loss
        #    # This reward will strengthen penalty and weaken reward
        #    return self.config.reward_step_rl
        #    #return 0
        #else:
        #    return -self.config.reward_step_rl

    def get_final_reward(self):
        acc = max(self.best_acc, 1 / self.config.dim_output_stud)
        reward = -self.config.reward_c / acc

        if self.reward_baseline is None:
            self.reward_baseline = reward
        decay = self.config.reward_baseline_decay
        adv = reward - self.reward_baseline
        adv = min(adv, self.config.reward_max_value)
        adv = max(adv, -self.config.reward_max_value)
        # ----Shift average----
        self.reward_baseline = decay * self.reward_baseline\
            + (1 - decay) * reward
        return reward, adv

    def get_state(self):
        abs_diff = []
        rel_diff = []
        if self.improve_baseline is None:
            ib = 1
        else:
            ib = self.improve_baseline

        for v, t in zip(self.previous_valid_loss, self.previous_train_loss):
            abs_diff.append(v - t)
            if t > 1e-6:
                rel_diff.append(v / t)
            else:
                rel_diff.append(1)
            state = (
                rel_diff[-1:] + _normalize1([abs(ib)]) +
                _normalize2(self.previous_ce_loss[-1:]) +
                _normalize2(self.previous_l1_loss[-1:]) +
                self.previous_train_acc[-1:] +
                [self.previous_train_acc[-1] - self.previous_valid_acc[-1]] +
                self.previous_valid_acc[-1:])
        return np.array(state, dtype='f')