コード例 #1
0
ファイル: utils.py プロジェクト: RuiShu/bcde
def build_optimizer(loss, update_ops=[], scope=None, reuse=None):
    with tf.variable_scope(scope, 'gradients', reuse=reuse):
        print "Building optimizer"
        optimizer = AdamaxOptimizer(args.lr) if args.adamax else AdamOptimizer(
            args.lr)
        # max clip and max norm hyperparameters from Sonderby's LVAE code
        clipped, grad_norm = clip_gradients(optimizer,
                                            loss,
                                            max_clip=0.9,
                                            max_norm=4)
        with tf.control_dependencies(update_ops):
            train_step = optimizer.apply_gradients(clipped)
    return train_step
コード例 #2
0
ファイル: DNGPU_model.py プロジェクト: power-hunger/DNGPU
    def createGraph(self):
        """Creates graph for training"""
        self.base_cost = 0.0
        self.accuracy = 0
        num_sizes = len(self.bins)
        self.cost_list = []
        sum_weight = 0
        self.bin_losses = []
        saturation_loss = []

        # Create all bins and calculate losses for them

        with vs.variable_scope("var_lengths"):
            for seqLength, itemCount, ind in zip(self.bins, self.count_list,
                                                 range(num_sizes)):
                x_in = tf.placeholder("int32", [itemCount, seqLength])
                y_in = tf.placeholder("int64", [itemCount, seqLength])
                self.x_input.append(x_in)
                self.y_input.append(y_in)
                self.saturation_costs = []
                c, a, _, _, perItemCost, _ = self.createLoss(
                    x_in, y_in, seqLength)

                weight = 1.0  #/seqLength
                sat_cost = tf.add_n(self.saturation_costs) / (
                    (seqLength**2) * itemCount)
                saturation_loss.append(sat_cost * weight)
                self.bin_losses.append(perItemCost)
                self.base_cost += c * weight
                sum_weight += weight
                self.accuracy += a
                self.cost_list.append(c)
                tf.get_variable_scope().reuse_variables()

        # calculate the total loss
        self.base_cost /= sum_weight
        self.accuracy /= num_sizes

        self.sat_loss = tf.reduce_sum(
            tf.stack(saturation_loss)) * self.saturation_weight / sum_weight
        cost = self.base_cost + self.sat_loss

        # add gradient noise proportional to learning rate
        tvars = tf.trainable_variables()
        grads_0 = tf.gradients(cost, tvars)

        grads = []
        for grad in grads_0:
            grad1 = grad + tf.truncated_normal(
                tf.shape(grad)) * self.learning_rate * 1e-4
            grads.append(grad1)

        # optimizer
        optimizer = AdamaxOptimizer(self.learning_rate,
                                    beta1=0.9,
                                    beta2=1.0 - self.beta2_rate,
                                    epsilon=1e-8)
        self.optimizer = optimizer.apply_gradients(
            zip(grads, tvars), global_step=self.global_step)

        # some values for printout
        max_vals = []

        for var in tvars:
            varV = optimizer.get_slot(var, "m")
            max_vals.append(varV)

        self.gnorm = tf.global_norm(max_vals)
        self.cost_list = tf.stack(self.cost_list)