def _build_train(self):
        print("-" * 80)
        print("Build train graph")
        print(self.x_train)
        logits = self._model(self.x_train, is_training=True)
        """
        # CIFAR10 to chess modification
        log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=self.y_train
        )
        """
        print("@@@@@@@@@@@@@@@@@@@@@@@@")
        print(logits)
        print(self.y_train)
        print("@@@@@@@@@@@@@@@@@@@@@@@@")

        #log_probs = tf.keras.backend.categorical_crossentropy(target=logits, output=self.y_train, axis=1)
        log_probs = tf.keras.losses.MSE(logits, self.y_train)

        self.loss = tf.reduce_mean(log_probs)

        self.train_preds = tf.argmax(logits, axis=1)
        self.train_preds = tf.cast(self.train_preds, tf.float32)
        self.train_acc = tf.equal(self.train_preds, self.y_train)
        self.train_acc = tf.cast(self.train_acc, tf.int32)
        self.train_acc = tf.reduce_sum(self.train_acc)

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("Model has {} params".format(self.num_vars))

        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.global_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            lr_cosine=self.lr_cosine,
            lr_max=self.lr_max,
            lr_min=self.lr_min,
            lr_T_0=self.lr_T_0,
            lr_T_mul=self.lr_T_mul,
            num_train_batches=self.num_train_batches,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas,
        )
Beispiel #2
0
    def _build_train(self):
        print("-" * 80)
        print("Build train graph")
        logits = self._model(self.x_train, is_training=True)
        log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=self.y_train)
        self.loss = tf.reduce_mean(log_probs)

        #self._weight_transfer_loss()

        if self.use_aux_heads:
            log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.aux_logits, labels=self.y_train)
            self.aux_loss = tf.reduce_mean(log_probs)
            train_loss = self.loss + 0.4 * self.aux_loss
        else:
            train_loss = self.loss

        self.train_preds = tf.argmax(logits, axis=1)
        self.train_preds = tf.to_int32(self.train_preds)
        self.train_acc = tf.equal(self.train_preds, self.y_train)
        self.train_acc = tf.to_int32(self.train_acc)
        self.train_acc = tf.reduce_sum(self.train_acc)

        tf_variables = [
            var for var in tf.trainable_variables()
            if (var.name.startswith(self.name) and "aux_head" not in var.name)
        ]
        for var in tf_variables:
            print(var)
        #print ('tf_variables!!!!!!!!')
        #print (tf_variables)
        self.num_vars = count_model_params(tf_variables)
        print("Model has {0} params".format(self.num_vars))

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            train_loss,
            tf_variables,
            self.global_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            lr_cosine=self.lr_cosine,
            lr_max=self.lr_max,
            lr_min=self.lr_min,
            lr_T_0=self.lr_T_0,
            lr_T_mul=self.lr_T_mul,
            num_train_batches=self.num_train_batches,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
Beispiel #3
0
    def build_trainer(self):
        self.valid_acc = tf.placeholder(dtype=tf.float32, shape=[])
        mask = tf.placeholder(dtype=tf.bool, shape=[self.batch_size])
        self.cur_sample_entropy = tf.boolean_mask(
            self.sample_entropy, mask)[0]
        self.cur_sample_log_prob = tf.boolean_mask(
            self.sample_log_prob, mask)[0]
        self.cur_skip_count = tf.boolean_mask(self.skip_count, mask)[0]
        self.cur_skip_penaltys = tf.boolean_mask(
            self.skip_penaltys, mask)[0]

        reward = self.valid_acc

        normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2)
        self.skip_rate = tf.to_float(self.cur_skip_count) / normalize

        if self.entropy_weight is not None:
            reward += self.entropy_weight * self.cur_sample_entropy

        self.cur_sample_log_prob = tf.reduce_sum(self.cur_sample_log_prob)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(
            self.baseline, (1 - self.bl_dec) * (self.baseline - reward))

        with tf.control_dependencies([baseline_update]):
            reward = tf.identity(reward)

        self.loss = self.cur_sample_log_prob * (reward - self.baseline)
        if self.skip_weight is not None:
            self.loss += self.skip_weight * self.cur_skip_penaltys

        self.train_step = tf.Variable(
            0, dtype=tf.int32, trainable=False, name="train_step")
        tf_variables = [var
                        for var in tf.trainable_variables() if var.name.startswith(self.name)]
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=False,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
Beispiel #4
0
    def build_trainer(self, child_model):
        child_model.build_valid_rl()
        self.valid_acc = tf.cast(
            child_model.valid_shuffle_acc, tf.float32) / tf.cast(
                child_model.batch_size, tf.float32)
        self.reward = self.valid_acc

        normalize = tf.cast(self.num_layers * (self.num_layers - 1) / 2,
                            tf.float32)
        self.skip_rate = tf.cast(self.skip_count, tf.float32) / normalize

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)

        self.loss = self.sample_log_prob * (self.reward - self.baseline)
        if self.skip_weight is not None:
            self.loss += self.skip_weight * self.skip_penaltys

        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")
        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas,
        )
Beispiel #5
0
    def _build_train(self):
        print("-" * 80)
        print("Build train graph")
        self.output, self.layers = output, layers = self._model(
            self.x_train, is_training=True)
        # update loss to SSE
        label_onehot = tf.cast(tf.one_hot(self.y_train, 10), tf.float32)
        with tf.name_scope('loss'):
            # TODO: change to reduce_mean?
            self.loss = 0.5 * tf.reduce_sum(tf.square(label_onehot - output))

        self.train_preds = tf.argmax(output, axis=1)
        self.train_preds = tf.to_int32(self.train_preds)
        self.train_acc = tf.equal(self.train_preds, self.y_train)
        self.train_acc = tf.to_int32(self.train_acc)
        self.train_acc = tf.reduce_sum(self.train_acc)

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("Model has {} params".format(self.num_vars))

        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        self.train_op, self.lr, self.grad_norm, self.grads, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.global_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            lr_cosine=self.lr_cosine,
            lr_max=self.lr_max,
            lr_min=self.lr_min,
            lr_T_0=self.lr_T_0,
            lr_T_mul=self.lr_T_mul,
            num_train_batches=self.num_train_batches,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas,
            bitsW=self.bitsW,
            bitsG=self.bitsG,
            is_child=True)
Beispiel #6
0
  def _build_train(self):
    print("-" * 80)
    print("Build train graph")
    output = self._model(self.x_train, is_training=True)
    target = (self.y_train - 127) / 127
    self.loss = tf.reduce_mean(
      tf.losses.absolute_difference(target, output))
    train_loss = self.loss

    self.train_psnr = psnr(self.y_train, output)

    tf.summary.scalar('loss', self.loss)
    output = output * 127 + 127
    output = tf.clip_by_value(output, 0, 255)
    input_img = self.x_train*127 + 127
    bicubic_img = tf.image.resize_bicubic(input_img, [128, 128])
    tf.summary.image("output", tf.cast(output, tf.uint8))
    tf.summary.image("target", tf.cast(self.y_train, tf.uint8))
    tf.summary.image("input", tf.cast(input_img, tf.uint8))
    tf.summary.image("bicubic", tf.cast(bicubic_img, tf.uint8))

    tf_variables = [
      var for var in tf.trainable_variables() if (
        var.name.startswith(self.name) and "aux_head" not in var.name)]
    self.num_vars = count_model_params(tf_variables)
    print("Model has {0} params".format(self.num_vars))

    self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
      train_loss,
      tf_variables,
      self.global_step,
      clip_mode=self.clip_mode,
      grad_bound=self.grad_bound,
      l2_reg=self.l2_reg,
      lr_init=self.lr_init,
      lr_dec_start=self.lr_dec_start,
      lr_dec_every=self.lr_dec_every,
      lr_dec_rate=self.lr_dec_rate,
      lr_cosine=self.lr_cosine,
      lr_max=self.lr_max,
      lr_min=self.lr_min,
      lr_T_0=self.lr_T_0,
      lr_T_mul=self.lr_T_mul,
      num_train_batches=self.num_train_batches,
      optim_algo=self.optim_algo,
      sync_replicas=self.sync_replicas,
      num_aggregate=self.num_aggregate,
      num_replicas=self.num_replicas)

    tf.summary.scalar('lr', self.lr)
    self.summaries = tf.summary.merge_all()
Beispiel #7
0
    def _build_train(self):
        print("Build train graph")
        all_h, self.train_reset = self._model(self.x_train, True, False)
        log_probs = self._get_log_probs(all_h,
                                        self.y_train,
                                        batch_size=self.batch_size,
                                        is_training=True)
        self.loss = tf.reduce_sum(log_probs) / tf.to_float(self.batch_size)
        self.train_ppl = tf.exp(tf.reduce_mean(log_probs))

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("-" * 80)
        print("Model has {} parameters".format(self.num_vars))

        loss = self.loss
        if self.rnn_l2_reg is not None:
            loss += (self.rnn_l2_reg * tf.reduce_sum(all_h**2) /
                     tf.to_float(self.batch_size))
        if self.rnn_slowness_reg is not None:
            loss += (self.rnn_slowness_reg * self.all_h_diff /
                     tf.to_float(self.batch_size))
        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        (self.train_op, self.lr, self.grad_norm, self.optimizer,
         self.grad_norms) = get_train_ops(
             loss,
             tf_variables,
             self.global_step,
             clip_mode=self.clip_mode,
             grad_bound=self.grad_bound,
             l2_reg=self.l2_reg,
             lr_warmup_val=self.lr_warmup_val,
             lr_warmup_steps=self.lr_warmup_steps,
             lr_init=self.lr_init,
             lr_dec_start=self.lr_dec_start,
             lr_dec_every=self.lr_dec_every,
             lr_dec_rate=self.lr_dec_rate,
             lr_dec_min=self.lr_dec_min,
             optim_algo=self.optim_algo,
             moving_average=self.optim_moving_average,
             sync_replicas=self.sync_replicas,
             num_aggregate=self.num_aggregate,
             num_replicas=self.num_replicas,
             get_grad_norms=True,
         )
Beispiel #8
0
    def _build_train(self):
        print("-" * 80)
        print("Build train graph")
        logits = self._model(self.x_train, is_training=True)
        log_probs = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=logits, labels=self.y_train)
        self.loss = tf.reduce_mean(log_probs)

        outs = tf.nn.sigmoid(logits)
        self.train_preds = tf.greater_equal(outs, tf.constant(0.5))
        self.train_preds = tf.to_int32(self.train_preds)
        self.y_train = tf.to_int32(self.y_train)
        self.soft_acc_count = tf.count_nonzero(tf.equal(
            self.train_preds, self.y_train),
                                               axis=1)
        self.train_acc = tf.to_int32(tf.equal(self.soft_acc_count, 6))
        self.train_acc = tf.reduce_sum(self.train_acc)

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("Model has {} params".format(self.num_vars))

        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.global_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            lr_cosine=self.lr_cosine,
            lr_max=self.lr_max,
            lr_min=self.lr_min,
            lr_T_0=self.lr_T_0,
            lr_T_mul=self.lr_T_mul,
            num_train_batches=self.num_train_batches,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
    def build_trainer(self, child_model):
        child_model.build_valid_rl()
        self.valid_PSNR = tf.placeholder(dtype=tf.float32)
        self.reward = self.valid_PSNR
        # self.reward = tf.Print(self.reward, [self.reward], message="reward of this batch : ")

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)
        # self.sample_log_prob = tf.Print(self.sample_log_prob,[self.sample_log_prob], message="sample_log_prob of this batch : ")
        self.baseline = tf.placeholder(dtype=tf.float32)
        # baseline_update = tf.assign_sub(
        #     self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward))
        #
        # with tf.control_dependencies([baseline_update]):
        #     self.reward = tf.identity(self.reward)

        self.loss = self.sample_log_prob * (self.reward - self.baseline)
        # self.loss = tf.Print(self.loss,[self.loss], message="loss of this batch : ")
        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.train_op, self.lr, self.grad_norm, self.optimizer, self.grads = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)

        self.skip_rate = tf.constant(0.0, dtype=tf.float32)
Beispiel #10
0
    def build_trainer(self, child_model):
        child_model.build_valid_rl()
        self.reward = tf.to_float(child_model.accuracy)

        if self.entropy_weight:
            self.reward += self.entropy_weight * self.sample_entropy

        self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)
        self.baseline = tf.Variable(0.0,
                                    dtype=tf.float32,
                                    trainable=False,
                                    name='baseline')
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)

        self.loss = self.sample_log_prob * (self.reward - self.baseline)
        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name='train_step')

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]

        for var in tf_variables:
            print(var)

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)

        self.skip_rate = tf.constant(0.0, dtype=tf.float32)
Beispiel #11
0
    def build_trainer(self, child_model):
        # actor
        self.valid_loss = tf.to_float(child_model.rl_loss)
        self.valid_loss = tf.stop_gradient(self.valid_loss)
        self.valid_loss = tf.minimum(self.valid_loss, 10.0)
        self.valid_ppl = tf.exp(self.valid_loss)
        self.reward = 80.0 / self.valid_ppl

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        # or baseline
        self.sample_log_probs = tf.reduce_sum(self.sample_log_probs)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)
        self.loss = self.sample_log_probs * (self.reward - self.baseline)

        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")
        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
Beispiel #12
0
    def _build_train(self):
        print("Build train graph")
        if self.use_model == "SRCNN":
            self.train_preds = self._model_srcnn(self.x_train, True)
        elif self.use_model == "RDN":
            self.train_preds = self._model_RDN(self.x_train, True)
        else:
            self.train_preds = self._model(self.x_train, True)
        self.loss = tf.losses.mean_squared_error(labels=self.y_train,
                                                 predictions=self.train_preds)

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.global_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_warmup_steps=self.lr_warmup_steps,
            lr_warmup_val=self.lr_warmup_val,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo
            # sync_replicas=self.sync_replicas,
            # num_aggregate=self.num_aggregate,
            # num_replicas=self.num_replicas
        )
Beispiel #13
0
    def _build_train(self):
        print "Build train graph"
        logits = self._model(self.x_train, True)
        log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=self.y_train)
        self.loss = tf.reduce_mean(log_probs)

        self.train_preds = tf.argmax(logits, axis=1)
        self.train_preds = tf.to_int32(self.train_preds)
        self.train_acc = tf.equal(self.train_preds, self.y_train)
        self.train_acc = tf.to_int32(self.train_acc)
        self.train_acc = tf.reduce_sum(self.train_acc)

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print "-" * 80
        for var in tf_variables:
            print var

        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.global_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
Beispiel #14
0
    def build_trainer(self, child_model):
        child_model.build_valid_rl()
        lookup = tf.Variable([9., 25., 9., 3., 1.])

        self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) /
                          tf.to_float(child_model.batch_size))

        res = tf.reshape(self.sample_arc[0][1], [1])
        for idx in range(1, self.num_cells):
            res = tf.concat(
                [res, tf.reshape(self.sample_arc[0][idx * 2 + 1], [1])],
                axis=0)
        operators_cell = tf.convert_to_tensor(res, dtype=tf.int32)
        latency_cell = tf.gather(lookup, operators_cell)
        latency_cell = tf.reduce_sum(latency_cell)

        res2 = tf.reshape(self.sample_arc[1][1], [1])
        for idx in range(1, self.num_cells):
            res2 = tf.concat(
                [res2, tf.reshape(self.sample_arc[1][idx * 2 + 1], [1])],
                axis=0)
        operators_redu = tf.convert_to_tensor(res2, dtype=tf.int32)
        latency_redu = tf.gather(lookup, operators_redu)
        latency_redu = tf.reduce_sum(latency_redu)
        latency_sum = tf.math.add(latency_cell, latency_redu)
        alpha = tf.to_float(0.)
        beta = tf.to_float(-1.)
        threshold = tf.to_float(140.)
        latency_val = tf.cond(tf.math.greater(threshold, latency_sum),
                              lambda: tf.math.pow(latency_sum, alpha),
                              lambda: tf.math.pow(latency_sum, beta))
        self.latency_sum = latency_sum

        if self.multi_objective == False:
            self.reward = self.valid_acc

        else:
            self.reward = self.valid_acc * latency_val  # objective function

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)

        self.loss = self.sample_log_prob * (self.reward - self.baseline)
        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)

        self.skip_rate = tf.constant(0.0, dtype=tf.float32)
Beispiel #15
0
    def build_trainer(self, child_model):
        child_model.build_valid_rl()
        self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) /
                          tf.to_float(child_model.batch_size))
        self.reward = self.valid_acc

        all_h = tf.concat(self.all_h, axis=0)
        value_function = tf.matmul(all_h, self.w_critic)
        advantage = value_function - self.reward
        critic_loss = tf.reduce_sum(advantage**2)

        critic_train_step = tf.Variable(0,
                                        dtype=tf.int32,
                                        trainable=False,
                                        name="critic_train_step")
        critic_train_op, _, _, _ = get_train_ops(critic_loss, [self.w_critic],
                                                 critic_train_step,
                                                 clip_mode=None,
                                                 lr_init=1e-3,
                                                 lr_dec_start=0,
                                                 lr_dec_every=int(1e9),
                                                 optim_algo="adam",
                                                 sync_replicas=False)

        normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2)
        self.skip_rate = tf.to_float(self.skip_count) / normalize

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)

        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)

        # self.loss = self.sample_log_prob * (self.reward - self.baseline)
        search_probs = np.array(
            [child["N"] for child in self.root_node["children"]])
        search_probs /= search_probs.sum()
        search_probs = np.expand_dims(search_probs, -1)
        search_probs = tf.convert_to_tensor(search_probs, dtype=tf.float32)
        self.sample_log_prob = tf.expand_dims(self.sample_log_prob, axis=0)
        self.loss = tf.matmul(search_probs, self.sample_log_prob)
        if self.skip_weight is not None:
            self.loss += self.skip_weight * self.skip_penaltys

        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")
        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        print "-" * 80
        for var in tf_variables:
            print var

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
Beispiel #16
0
    def build_trainer(self, child_model):
        child_model.build_valid_rl()
        self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) /
                          tf.to_float(child_model.batch_size))
        self.reward = self.valid_acc

        normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2)
        self.skip_rate = tf.to_float(self.skip_count) / normalize

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)

# scale client and linear cost
        d1 = tf.floor(
            tf.log(child_model.client_online_cost) / tf.log(10.0)) + 1
        self.client_cost = child_model.client_online_cost / 10**d1

        d2 = tf.floor(tf.log(child_model.linear_cost) / tf.log(10.0)) + 1
        self.linear_cost = child_model.linear_cost / 10**d2

        # modify the reward
        #self.reward -= 10*self.skip_rate
        #self.reward -= child_model.linear_cost/100000000.0
        #self.reward -= 0.01*(child_model.client_online_cost/(64*24*10000) + 32.8)

        # client online cost
        #self.reward -= 3*0.0357*(child_model.client_online_cost*0.187382/3225600+13.86)
        #self.reward -= 3*0.0094*(child_model.client_online_cost*0.187382/3225600+52.393)
        #self.reward -= 3*0.0025*(child_model.client_online_cost*0.187382/3225600+197.73)

        # linear cost
        #self.reward -= 3*self.linear_cost

        self.loss = self.sample_log_prob * (self.reward - self.baseline)
        #check no skip
        if not self.no_skip:
            if self.skip_weight is not None:
                self.loss += self.skip_weight * self.skip_penaltys

        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")
        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        print "-" * 80
        for var in tf_variables:
            print var

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
Beispiel #17
0
  def build_trainer(self, child_model):
    # actor
    child_model.build_valid_rl()
    self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) /
                      tf.to_float(child_model.batch_size))
    self.reward = self.valid_acc

    if self.use_critic:
      # critic
      all_h = tf.concat(self.all_h, axis=0)
      value_function = tf.matmul(all_h, self.w_critic)
      advantage = value_function - self.reward
      critic_loss = tf.reduce_sum(advantage ** 2)
      self.baseline = tf.reduce_mean(value_function)
      self.loss = -tf.reduce_mean(self.sample_log_probs * advantage)

      critic_train_step = tf.Variable(
          0, dtype=tf.int32, trainable=False, name="critic_train_step")
      critic_train_op, _, _, _ = get_train_ops(
        critic_loss,
        [self.w_critic],
        critic_train_step,
        clip_mode=None,
        lr_init=1e-3,
        lr_dec_start=0,
        lr_dec_every=int(1e9),
        optim_algo="adam",
        sync_replicas=False)
    else:
      # or baseline
      self.sample_log_probs = tf.reduce_sum(self.sample_log_probs)
      self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
      baseline_update = tf.assign_sub(
        self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward))
      with tf.control_dependencies([baseline_update]):
        self.reward = tf.identity(self.reward)
      self.loss = self.sample_log_probs * (self.reward - self.baseline)

    self.train_step = tf.Variable(
        0, dtype=tf.int32, trainable=False, name="train_step")
    tf_variables = [var for var in tf.trainable_variables()
                    if var.name.startswith(self.name)
                      and "w_critic" not in var.name]
    print ("-" * 80)
    for var in tf_variables:
      print (var)
    self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
      self.loss,
      tf_variables,
      self.train_step,
      clip_mode=self.clip_mode,
      grad_bound=self.grad_bound,
      l2_reg=self.l2_reg,
      lr_init=self.lr_init,
      lr_dec_start=self.lr_dec_start,
      lr_dec_every=self.lr_dec_every,
      lr_dec_rate=self.lr_dec_rate,
      optim_algo=self.optim_algo,
      sync_replicas=self.sync_replicas,
      num_aggregate=self.num_aggregate,
      num_replicas=self.num_replicas)

    if self.use_critic:
      self.train_op = tf.group(self.train_op, critic_train_op)
Beispiel #18
0
class PTBEnasController(object):
  def __init__(self,
               rhn_depth=5,
               lstm_size=32,
               lstm_num_layers=2,
               lstm_keep_prob=1.0,
               tanh_constant=None,
               temperature=None,
               num_funcs=2,
               lr_init=1e-3,
               lr_dec_start=0,
               lr_dec_every=100,
               lr_dec_rate=0.9,
               l2_reg=0,
               entropy_weight=None,
               clip_mode=None,
               grad_bound=None,
               bl_dec=0.999,
               optim_algo="adam",
               sync_replicas=False,
               num_aggregate=None,
               num_replicas=None,
               name="controller"):

    print("-" * 80)
    print("Building PTBEnasController")

    self.rhn_depth = rhn_depth
    self.lstm_size = lstm_size
    self.lstm_num_layers = lstm_num_layers 
    self.lstm_keep_prob = lstm_keep_prob
    self.tanh_constant = tanh_constant
    self.temperature = temperature
    self.num_funcs = num_funcs
    self.lr_init = lr_init
    self.lr_dec_start = lr_dec_start
    self.lr_dec_every = lr_dec_every
    self.lr_dec_rate = lr_dec_rate
    self.l2_reg = l2_reg
    self.entropy_weight = entropy_weight
    self.clip_mode = clip_mode
    self.grad_bound = grad_bound
    self.bl_dec = bl_dec
    self.optim_algo = optim_algo
    self.sync_replicas = sync_replicas
    self.num_aggregate = num_aggregate
    self.num_replicas = num_replicas
    self.name = name

    self._create_params()
    self._build_sampler()

  def _create_params(self):
	#初始化参数
	#使用生成均匀分布的初始化器
    initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1)
    with tf.variable_scope(self.name, initializer=initializer):
      with tf.variable_scope("lstm"):
        self.w_lstm = []
        for layer_id in xrange(self.lstm_num_layers):
          with tf.variable_scope("layer_{}".format(layer_id)):
            w = tf.get_variable("w", [2 * self.lstm_size, 4 * self.lstm_size])
            self.w_lstm.append(w)

      num_funcs = self.num_funcs
      with tf.variable_scope("embedding"):
        self.g_emb = tf.get_variable("g_emb", [1, self.lstm_size])
        self.w_emb = tf.get_variable("w", [num_funcs, self.lstm_size])

      with tf.variable_scope("softmax"):
        self.w_soft = tf.get_variable("w", [self.lstm_size, num_funcs])

      with tf.variable_scope("attention"):
        self.attn_w_1 = tf.get_variable("w_1", [self.lstm_size, self.lstm_size])
        self.attn_w_2 = tf.get_variable("w_2", [self.lstm_size, self.lstm_size])
        self.attn_v = tf.get_variable("v", [self.lstm_size, 1])

  def _build_sampler(self):
    """Build the sampler ops and the log_prob ops."""

    arc_seq = []
    sample_log_probs = []
    sample_entropy = []
    all_h = []
    all_h_w = []

    # sampler ops
    inputs = self.g_emb
    prev_c, prev_h = [], []
	#prev_c=[0,0,0...,0],总共有self.lstm_num_layers*self.lstm_size个0
	#prev_h一样
    for _ in xrange(self.lstm_num_layers):
      prev_c.append(tf.zeros([1, self.lstm_size], dtype=tf.float32))
      prev_h.append(tf.zeros([1, self.lstm_size], dtype=tf.float32))

    # used = tf.zeros([self.rhn_depth, 2], dtype=tf.int32)
    for layer_id in xrange(self.rhn_depth):
      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      prev_c, prev_h = next_c, next_h
      all_h.append(next_h[-1])
      all_h_w.append(tf.matmul(next_h[-1], self.attn_w_1))

      if layer_id > 0:
        query = tf.matmul(next_h[-1], self.attn_w_2)
        query = query + tf.concat(all_h_w[:-1], axis=0)
        query = tf.tanh(query)
        logits = tf.matmul(query, self.attn_v)
        logits = tf.reshape(logits, [1, layer_id])

        if self.temperature is not None:
          logits /= self.temperature
        if self.tanh_constant is not None:
          logits = self.tanh_constant * tf.tanh(logits)
        diff = tf.to_float(layer_id - tf.range(0, layer_id)) ** 2
        logits -= tf.reshape(diff, [1, layer_id]) / 6.0

        skip_index = tf.multinomial(logits, 1)
        skip_index = tf.to_int32(skip_index)
        skip_index = tf.reshape(skip_index, [1])
        arc_seq.append(skip_index)

        log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=skip_index)
        sample_log_probs.append(log_prob)

        entropy = log_prob * tf.exp(-log_prob)
        sample_entropy.append(tf.stop_gradient(entropy))

        inputs = tf.nn.embedding_lookup(
          tf.concat(all_h[:-1], axis=0), skip_index)
        inputs /= (0.1 + tf.to_float(layer_id - skip_index))
      else:
        inputs = self.g_emb

      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      prev_c, prev_h = next_c, next_h
      logits = tf.matmul(next_h[-1], self.w_soft)
      if self.temperature is not None:
        logits /= self.temperature
      if self.tanh_constant is not None:
        logits = self.tanh_constant * tf.tanh(logits)
      func = tf.multinomial(logits, 1)
      func = tf.to_int32(func)
      func = tf.reshape(func, [1])
      arc_seq.append(func)
      log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=func)
      sample_log_probs.append(log_prob)
      entropy = log_prob * tf.exp(-log_prob)
      sample_entropy.append(tf.stop_gradient(entropy))
      inputs = tf.nn.embedding_lookup(self.w_emb, func)

    arc_seq = tf.concat(arc_seq, axis=0)
    self.sample_arc = arc_seq

    self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
    self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs))

    sample_entropy = tf.concat(sample_entropy, axis=0)
    self.sample_entropy = tf.reduce_sum(sample_entropy)

    self.all_h = all_h

  #这个函数构建了reward的计算,生成了训练的操作
  def build_trainer(self, child_model):
    # actor
	#tf.to_float的作用是将tensor转化为float
	#controller的损失来自于子模型的rl_loss
    self.valid_loss = tf.to_float(child_model.rl_loss)
	#使用tf.stop_gradient阻挡valid_loss的BP
    self.valid_loss = tf.stop_gradient(self.valid_loss)
	#计算PPL=e^valid_loss
    self.valid_ppl = tf.exp(self.valid_loss)
	#reward=80/ppl
    self.reward = 80.0 / self.valid_ppl
	
	#并不知道entropy_weight是什么
    if self.entropy_weight is not None:
      self.reward += self.entropy_weight * self.sample_entropy

    # or baseline
    self.sample_log_probs = tf.reduce_sum(self.sample_log_probs)
    self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
    baseline_update = tf.assign_sub(
      self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward))

	#先计算baseline_update再计算reward  
    with tf.control_dependencies([baseline_update]):
      self.reward = tf.identity(self.reward)
	  
	#损失函数的计算:loss=sample_log_probs*(reward-baseline)
    self.loss = self.sample_log_probs * (self.reward - self.baseline)

    #创建变量train_step,这个变量表示什么
	self.train_step = tf.Variable(
        0, dtype=tf.int32, trainable=False, name="train_step")
    
	#tf_variables存储了所有可训练参数,这些参数满足一个条件,它们必须以self.name开头,self.name默认是"controller"
	tf_variables = [var
        for var in tf.trainable_variables() if var.name.startswith(self.name)]

    self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
      self.loss,
      tf_variables,
      self.train_step,
      clip_mode=self.clip_mode,
      grad_bound=self.grad_bound,
      l2_reg=self.l2_reg,
      lr_init=self.lr_init,
      lr_dec_start=self.lr_dec_start,
      lr_dec_every=self.lr_dec_every,
      lr_dec_rate=self.lr_dec_rate,
      optim_algo=self.optim_algo,
      sync_replicas=self.sync_replicas,
      num_aggregate=self.num_aggregate,
      num_replicas=self.num_replicas)