예제 #1
0
 def compute_gradients(self, loss, var_list=None, *args, **kwargs):
     if self._scale != 1.0:
         loss = tf.scalar_mul(self._scale, loss)
     gradvar = self._optimizer.compute_gradients(loss, var_list, *args,
                                                 **kwargs)
     gradvar = [(tf.scalar_mul(1. / self._scale, g), v) for g, v in gradvar]
     return gradvar
예제 #2
0
    def do_loss_initializations(self,
                                yloss_type="hinge_loss",
                                diversity_loss_type="dpp_style:inverse_dist",
                                feature_weights="inverse_mad"):
        """Defines the optimization loss"""

        # define the loss parts
        self.yloss_type = yloss_type
        self.diversity_loss_type = diversity_loss_type

        self.loss_weights = [
            self.yloss_type, self.diversity_loss_type, feature_weights
        ]

        # loss part 1: y-loss
        self.loss_part1 = self.compute_first_part_of_loss(self.yloss_type)

        # loss part 2: similarity between CFs and original instance
        if feature_weights == "inverse_mad":
            normalized_mads = self.data_interface.get_mads(normalized=True)
            feature_weights = {}
            for feature in normalized_mads:
                feature_weights[feature] = round(1 / normalized_mads[feature],
                                                 2)

        feature_weights_list = []
        for feature in self.data_interface.encoded_feature_names:
            if feature in feature_weights:
                feature_weights_list.append(feature_weights[feature])
            else:
                feature_weights_list.append(1.0)
        feature_weights_list = [feature_weights_list]

        self.feature_weights = tf.Variable(self.minx, dtype=tf.float32)
        self.dice_sess.run(
            tf.assign(self.feature_weights,
                      np.array(feature_weights_list, dtype=np.float32)))

        self.loss_part2 = self.compute_second_part_of_loss()

        # loss part 3: diversity between CFs
        if self.total_random_inits > 0:
            # random initialization method
            self.loss_part3 = tf.constant(0.0, dtype=tf.float32)
        else:
            self.loss_part3 = self.compute_third_part_of_loss(
                self.diversity_loss_type)

        # loss part 4: diversity between CFs
        self.loss_part4 = self.compute_fourth_part_of_loss()

        # final loss:
        self.loss = tf.add(
            tf.subtract(
                tf.add(self.loss_part1,
                       tf.scalar_mul(self.weights[0], self.loss_part2)),
                tf.scalar_mul(self.weights[1], self.loss_part3)),
            tf.scalar_mul(self.weights[2], self.loss_part4))
예제 #3
0
 def apply_gradients(self, gradvars, *args, **kwargs):
     v_list = [tf.norm(tensor=v, ord=2) for _, v in gradvars]
     g_list = [
         tf.norm(tensor=g, ord=2) if g is not None else 0.0
         for g, _ in gradvars
     ]
     v_norms = tf.stack(v_list)
     g_norms = tf.stack(g_list)
     zeds = tf.zeros_like(v_norms)
     # assign epsilon if weights or grads = 0, to avoid division by zero
     # also prevent biases to get stuck at initialization (0.)
     cond = tf.logical_and(tf.not_equal(v_norms, zeds),
                           tf.not_equal(g_norms, zeds))
     true_vals = tf.scalar_mul(self._eta, tf.div(v_norms, g_norms))
     false_vals = tf.fill(tf.shape(v_norms), self._epsilon)
     larc_local_lr = tf.where(cond, true_vals, false_vals)
     if self._clip:
         ones = tf.ones_like(v_norms)
         lr = tf.fill(tf.shape(v_norms), self._learning_rate)
         # We need gradients to compute local learning rate,
         # so compute_gradients from initial optimizer have to called
         # for which learning rate is already fixed
         # We then have to scale the gradients instead of the learning rate.
         larc_local_lr = tf.minimum(tf.div(larc_local_lr, lr), ones)
     gradvars = [(tf.multiply(larc_local_lr[i], g), v) if g is not None else
                 (None, v) for i, (g, v) in enumerate(gradvars)]
     return self._optimizer.apply_gradients(gradvars, *args, **kwargs)
    def build_train_op(self, lr_boundaries, lr_values, optimizer_type):
        train_step = tf.Variable(initial_value=0, trainable=False)
        self.train_step = train_step

        prob, logits = self.build_network(self.train_image_placeholder, True,
                                          False)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.train_label_placeholder, logits=logits)

        weighted_loss = tf.multiply(cross_entropy,
                                    self.train_weight_placeholder)
        cross_entropy_mean = tf.reduce_mean(weighted_loss,
                                            name='cross_entropy')

        # Accuracy Calculation
        prediction = tf.equal(tf.cast(tf.argmax(prob, axis=1), tf.int32),
                              self.train_label_placeholder)
        prediction = tf.cast(prediction, tf.float32)

        ########################
        # variance -> distance
        mean, variance = tf.nn.moments(prob, axes=[1])

        # distance = sign(prediction) * variance
        # sign function : y = 2*prediction - 1
        sign = tf.subtract(tf.scalar_mul(2.0, prediction), 1.0)
        distance = sign * tf.sqrt(variance)
        ########################

        self.train_accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
        self.learning_rate = tf.train.piecewise_constant(
            train_step, lr_boundaries, lr_values)

        # Optimizer Setting
        if optimizer_type == 'sgd':
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        elif optimizer_type == 'momentum':
            opt = tf.train.MomentumOptimizer(self.learning_rate,
                                             FLAGS.momentum,
                                             use_nesterov=FLAGS.nesterov)

        weight = [i for i in tf.trainable_variables() if 'weight' in i.name]
        bias = [i for i in tf.trainable_variables() if 'bias' in i.name]
        beta = [i for i in tf.trainable_variables() if 'beta' in i.name]
        gamma = [i for i in tf.trainable_variables() if 'gamma' in i.name]

        assert len(weight) + len(bias) + len(beta) + len(gamma) == len(
            tf.trainable_variables())

        grads, total_loss, cross_entropy_loss = self.train_graph_model(
            opt, cross_entropy_mean)
        train_op = self.build_graph_train(opt, grads, optimizer_type,
                                          train_step)

        return cross_entropy_loss, self.train_accuracy, train_op, cross_entropy, prob, distance
예제 #5
0
    def __init__(self,
                 learning_rate,
                 num_layers,
                 size,
                 size_layer,
                 output_size,
                 forget_bias=0.1,
                 lambda_coeff=0.5):
        def lstm_cell(size_layer):
            return tf.nn.rnn_cell.GRUCell(size_layer)

        rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(size_layer) for _ in range(num_layers)],
            state_is_tuple=False,
        )
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))
        drop = tf.nn.rnn_cell.DropoutWrapper(rnn_cells,
                                             output_keep_prob=forget_bias)
        self.hidden_layer = tf.placeholder(tf.float32,
                                           (None, num_layers * size_layer))
        _, last_state = tf.nn.dynamic_rnn(drop,
                                          self.X,
                                          initial_state=self.hidden_layer,
                                          dtype=tf.float32)

        self.z_mean = tf.layers.dense(last_state, size)
        self.z_log_sigma = tf.layers.dense(last_state, size)

        epsilon = tf.random_normal(tf.shape(self.z_log_sigma))
        self.z_vector = self.z_mean + tf.exp(self.z_log_sigma)

        with tf.variable_scope('decoder', reuse=False):
            rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell(size_layer) for _ in range(num_layers)],
                state_is_tuple=False)
            drop_dec = tf.nn.rnn_cell.DropoutWrapper(
                rnn_cells_dec, output_keep_prob=forget_bias)
            x = tf.concat([tf.expand_dims(self.z_vector, axis=0), self.X],
                          axis=1)
            self.outputs, self.last_state = tf.nn.dynamic_rnn(
                drop_dec, self.X, initial_state=last_state, dtype=tf.float32)

        self.logits = tf.layers.dense(self.outputs[-1], output_size)
        self.lambda_coeff = lambda_coeff

        self.kl_loss = -0.5 * tf.reduce_sum(
            1.0 + 2 * self.z_log_sigma - self.z_mean**2 -
            tf.exp(2 * self.z_log_sigma), 1)
        self.kl_loss = tf.scalar_mul(self.lambda_coeff, self.kl_loss)
        self.cost = tf.reduce_mean(
            tf.square(self.Y - self.logits) + self.kl_loss)
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost)
예제 #6
0
	def __init__(self, x, y=None, teacherLogits=None, lr=1e-04, nClasses=8, imgXdim=84, imgYdim=84, batchSize=64, keepProb=1.0, temperature=8, lambda_=0.5):
		self.x = x
		self.w = {}
		self.b = {}
		self.y = y 
		self.teacherLogits = teacherLogits
		self.lambda_ = lambda_ 
		self.T = temperature
		self.imgXdim = imgXdim
		self.imgYdim = imgYdim
		self.nClasses = nClasses
		self.batchSize = batchSize 
		self.learningRate = lr 
		self.dropout = keepProb
		self.fcOutSize = 48
		
		# Initialize parameters randomly and run
		self.initParameters()
		self.output, self.layerInfo = self.run() 
		
		if self.teacherLogits != None: # For training
			# Define losses and optimizers & train the architecture with KD 
			self.outputTeacher = tf.scalar_mul(1.0 / self.T, self.teacherLogits)
			self.outputTeacher = tf.nn.softmax(self.outputTeacher)
			self.cost_1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.output, labels=self.y))
			self.pred = tf.nn.softmax(self.output)
			self.output = tf.scalar_mul(1.0 / self.T, self.output)
			self.cost_2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.output, labels=self.outputTeacher))
			self.cost = ((1.0 - lambda_) * self.cost_1 + lambda_ * self.cost_2)
			self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learningRate).minimize(self.cost)		
		else: # For standalone testing
			if self.y != None:
				self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.output, labels=self.y))
			self.pred = tf.nn.softmax(self.output)

		if self.y != None: # For labeled images
			# Evaluate model 
			self.correct_pred= tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.y, 1))
			self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
예제 #7
0
    def __init__(self,
                 state_size,
                 action_size,
                 gamma_reg=0.001,
                 minibatch_size=5,
                 **kwargs):
        """
		Parameters
		----------
		state_size, action_size : int
			Size of the environment state space and action space
		gamma_reg : float, optional
			LS-IELM regularisation parameter
		minibatch_size : int, optional
			Size of minibatches for updating
		**kwargs
			Additional keyword arguments passed to `SingleLayerNetwork`
		"""
        super().__init__(state_size, action_size, **kwargs)

        self.k = int(minibatch_size)
        self.prep_state = self.act
        self.H = tf.placeholder(shape=[self.k, self.N_hid], dtype=tf.float32)
        self.T = tf.placeholder(shape=[self.k, action_size], dtype=tf.float32)
        H_t = tf.transpose(self.H)
        A_inv = tf.Variable(tf.random_uniform([self.N_hid, self.N_hid], 0, 1))

        A0 = tf.add(tf.scalar_mul(1.0 / gamma_reg, tf.eye(self.N_hid)),
                    tf.matmul(H_t, self.H))
        A0_inv = tf.matrix_inverse(A0)
        W0 = tf.matmul(A0_inv, tf.matmul(H_t, self.T))
        self.initModel = (self.W.assign(W0), A_inv.assign(A0_inv))

        K1 = tf.add(tf.matmul(self.H, tf.matmul(A_inv, H_t)), tf.eye(self.k))
        K_t = tf.subtract(
            tf.eye(self.N_hid),
            tf.matmul(A_inv,
                      tf.matmul(H_t, tf.matmul(tf.matrix_inverse(K1),
                                               self.H))))
        W_new = tf.add(
            tf.matmul(K_t, self.W),
            tf.matmul(tf.matmul(K_t, A_inv), tf.matmul(H_t, self.T)))
        A_new = tf.matmul(K_t, A_inv)
        self.updateModel = (self.W.assign(W_new), A_inv.assign(A_new))

        self.first = True
        self.var_init()
예제 #8
0
 def __init__(self):
     # placeholder
     self.sph_user = tf.sparse_placeholder(tf.int32, name='sph_user')
     self.sph_doc = tf.sparse_placeholder(tf.int32, name='sph_doc')
     self.sph_con = tf.sparse_placeholder(tf.int32, name='sph_con')
     self.ph_reward = tf.placeholder(tf.float32, name='ph_reward')
     self.ph_nq = tf.placeholder(
         tf.float32,
         shape=[pd['batch_size'], pd['rnn_max_len']],
         name='ph_nq')
     # main networks
     self.dst_embed, self.mq = self.build_net('main')
     # target networks
     _, self.tq = self.build_net('target')
     diff = tf.reshape(self.ph_reward, [-1]) + tf.scalar_mul(
         tf.constant(pd['gamma']), tf.reshape(
             self.ph_nq, [-1])) - tf.reshape(self.mq, [-1])
     self.loss = tf.reduce_mean(tf.square(diff))
     self.a_grads = tf.clip_by_global_norm(
         tf.gradients(self.mq, self.dst_embed), pd['grad_clip'])[0]
     vs = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                            scope='main/value')
     vs.extend(
         tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                           scope='main/feat_embedding'))
     self.grads = tf.clip_by_global_norm(tf.gradients(self.loss, vs),
                                         pd['grad_clip'])[0]
     with tf.variable_scope('train_value'):
         optimizer = tf.train.AdamOptimizer(pd['lr'])
         self.opt = optimizer.apply_gradients(zip(self.grads, vs))
     self.m_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                       scope="main/value")
     self.m_params.extend(
         tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                           scope='main/feat_embedding'))
     self.t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                       scope="target/value")
     self.t_params.extend(
         tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                           scope='target/feat_embedding'))
     alpha = pd['double_networks_sync_step']
     self.sync_op = [
         tf.assign(t, (1.0 - alpha) * t + alpha * m)
         for t, m in zip(self.t_params, self.m_params)
     ]
     self.total_loss, self.batch_counter = 0.0, 0
예제 #9
0
    def build_test_op(self):
        prob, logits = self.build_network(self.test_image_placeholder, False,
                                          True)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.test_label_placeholder, logits=logits)

        prediction = tf.equal(tf.cast(tf.argmax(prob, axis=1), tf.int32),
                              self.test_label_placeholder)
        prediction = tf.cast(prediction, tf.float32)

        self.test_loss = tf.reduce_mean(loss)
        self.test_accuracy = tf.reduce_mean(prediction)

        # variance -> distance
        mean, variance = tf.nn.moments(prob, axes=[1])

        # distance = sign(prediction) * variance
        # sign function : y = 2*prediction - 1
        sign = tf.subtract(tf.scalar_mul(2.0, prediction), 1.0)
        distance = sign * tf.sqrt(variance)

        return self.test_loss, self.test_accuracy, loss, prob
예제 #10
0
    def build_train_op(self, lr_boundaries, lr_values, optimizer_type):
        train_step = tf.Variable(initial_value=0, trainable=False)

        self.train_step = train_step

        prob, logits = self.build_network(self.train_image_placeholder, True, False)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.train_label_placeholder,
            logits=logits
        )

        prediction = tf.equal(tf.cast(tf.argmax(prob, axis=1), tf.int32), self.train_label_placeholder)
        prediction = tf.cast(prediction, tf.float32)

        # variance -> distance
        mean, variance = tf.nn.moments(prob, axes=[1])
        # distance = sign(prediction) * variance
        # sign function : y = 2*prediction - 1
        sign = tf.subtract(tf.scalar_mul(2.0, prediction), 1.0)
        distance = sign * tf.sqrt(variance)
        
        l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])

        weighted_loss = tf.multiply(loss, self.train_weight_placeholder)
        self.train_loss = tf.reduce_mean(weighted_loss) + l2_loss*weight_decay

        self.train_accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
        self.learning_rate = tf.train.piecewise_constant(train_step, lr_boundaries, lr_values)

        if optimizer_type == "momentum":
            optimizer = tf.train.MomentumOptimizer(self.learning_rate, 0.9, use_nesterov=True)
        elif optimizer_type == "sgd":
            optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(self.train_loss, global_step=train_step)
        
        return self.train_loss, self.train_accuracy, train_op, loss, prob, distance
예제 #11
0
def aggregate_single_gradient_using_copy(grad_and_vars, use_mean,
                                         check_inf_nan):
    """Calculate the average gradient for a shared variable across all towers.

  Note that this function provides a synchronization point across all towers.

  Args:
    grad_and_vars: A list or tuple of (gradient, variable) tuples. Each
      (gradient, variable) pair within the outer list represents the gradient
      of the variable calculated for a single tower, and the number of pairs
      equals the number of towers.
    use_mean: if True, mean is taken, else sum of gradients is taken.
    check_inf_nan: check grads for nans and infs.

  Returns:
    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
      gradient has been averaged across all towers. The variable is chosen from
      the first tower. The has_nan_or_inf indicates the grads has nan or inf.
  """
    grads = [g for g, _ in grad_and_vars]
    if any(isinstance(g, tf.IndexedSlices) for g in grads):
        # TODO(reedwm): All-reduce IndexedSlices more effectively.
        grad = aggregate_indexed_slices_gradients(grads)
    else:
        grad = tf.add_n(grads)

    if use_mean and len(grads) > 1:
        grad = tf.scalar_mul(1.0 / len(grads), grad)

    v = grad_and_vars[0][1]
    if check_inf_nan:
        with tf.name_scope('check_for_inf_and_nan'):
            has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads)))
        return (grad, v), has_nan_or_inf
    else:
        return (grad, v), None
import tensorflow.compat.v1 as tf
# 创建二维张量,充当被加张量、被减张量、被乘张量、被除张量
t1 = tf.constant([[0, 1, 2], [3, 4, 5]], tf.float32)
# 创建与t1同类型的张量
# t2 = tf.constant([[5, 3, 1]], tf.float32)    # 与t1列数相同则按行计算
t2 = tf.constant([[1], [2]], tf.float32)  # 与t1行数相同则按列计算
session = tf.Session()
# 计算两个二维张量相加
result_add = tf.add(t1, t2)  # 等价于result_add = t1+t2
# 计算两个二维向量相减
result_subtract = tf.subtract(t1, t2)  # 等价于result_subtract = t1-t2
# 计算两个二维向量相乘
result_multiply = tf.multiply(t1, t2)  # 等价于result_multiply = t1*t2
# 计算一个标量与一个张量相乘
result_scalar_mul = tf.scalar_mul(2, t1)  # 等价于result_scalar_mul = 2*t1
# 计算两个二维张量相除
result_div = tf.div(t1, t2)  # 等价于result_div = t1/t2
# 打印结果
print("二维张量t1:\n", session.run(t1))
print("二维张量t2:\n", session.run(t2))
print("相加结果result_add:\n", session.run(result_add))
print("相减结果result_subtract:\n", session.run(result_subtract))
print("相乘结果result_multiply:\n", session.run(result_multiply))
print("标量2与张量t1相乘结果result_scalar_mul:\n", session.run(result_scalar_mul))
print("相除结果result_div:\n", session.run(result_div))
    def _static_subsample(self, indicator, batch_size, labels):
        """Returns subsampled minibatch.

    Args:
      indicator: boolean tensor of shape [N] whose True entries can be sampled.
        N should be a complie time constant.
      batch_size: desired batch size. This scalar cannot be None.
      labels: boolean tensor of shape [N] denoting positive(=True) and negative
        (=False) examples. N should be a complie time constant.

    Returns:
      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
        are sampled. It ensures the length of output of the subsample is always
        batch_size, even when number of examples set to True in indicator is
        less than batch_size.

    Raises:
      ValueError: if labels and indicator are not 1D boolean tensors.
    """
        # Check if indicator and labels have a static size.
        if not indicator.shape.is_fully_defined():
            raise ValueError(
                'indicator must be static in shape when is_static is'
                'True')
        if not labels.shape.is_fully_defined():
            raise ValueError('labels must be static in shape when is_static is'
                             'True')
        if not isinstance(batch_size, int):
            raise ValueError(
                'batch_size has to be an integer when is_static is'
                'True.')

        input_length = tf.shape(indicator)[0]

        # Set the number of examples set True in indicator to be at least
        # batch_size.
        num_true_sampled = tf.reduce_sum(tf.cast(indicator, tf.float32))
        additional_false_sample = tf.less_equal(
            tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)),
            batch_size - num_true_sampled)
        indicator = tf.logical_or(indicator, additional_false_sample)

        # Shuffle indicator and label. Need to store the permutation to restore the
        # order post sampling.
        permutation = tf.random_shuffle(tf.range(input_length))
        indicator = ops.matmul_gather_on_zeroth_axis(
            tf.cast(indicator, tf.float32), permutation)
        labels = ops.matmul_gather_on_zeroth_axis(tf.cast(labels, tf.float32),
                                                  permutation)

        # index (starting from 1) when indicator is True, 0 when False
        indicator_idx = tf.where(tf.cast(indicator, tf.bool),
                                 tf.range(1, input_length + 1),
                                 tf.zeros(input_length, tf.int32))

        # Replace -1 for negative, +1 for positive labels
        signed_label = tf.where(
            tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32),
            tf.scalar_mul(-1, tf.ones(input_length, tf.int32)))
        # negative of index for negative label, positive index for positive label,
        # 0 when indicator is False.
        signed_indicator_idx = tf.multiply(indicator_idx, signed_label)
        sorted_signed_indicator_idx = tf.nn.top_k(signed_indicator_idx,
                                                  input_length,
                                                  sorted=True).values

        [num_positive_samples, num_negative_samples
         ] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx,
                                           batch_size)

        sampled_idx = self._get_values_from_start_and_end(
            sorted_signed_indicator_idx, num_positive_samples,
            num_negative_samples, batch_size)

        # Shift the indices to start from 0 and remove any samples that are set as
        # False.
        sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32)
        sampled_idx = tf.multiply(
            tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32),
            sampled_idx)

        sampled_idx_indicator = tf.cast(
            tf.reduce_sum(tf.one_hot(sampled_idx, depth=input_length), axis=0),
            tf.bool)

        # project back the order based on stored permutations
        reprojections = tf.one_hot(permutation,
                                   depth=input_length,
                                   dtype=tf.float32)
        return tf.cast(
            tf.tensordot(tf.cast(sampled_idx_indicator, tf.float32),
                         reprojections,
                         axes=[0, 0]), tf.bool)
  def last_value_quantize(self,
                          inputs,
                          per_channel=False,
                          init_min=-6.0,
                          init_max=6.0,
                          name_prefix='FixedValueQuant',
                          reuse=None,
                          is_training=False,
                          num_bits=8,
                          narrow_range=False,
                          relative_quantile=0,
                          freeze=False,
                          quant_delay=False):
    """Adds a layer that collects quantization ranges as last input ranges.

    LastValueQuantize creates variables called 'min' and 'max', representing the
    interval used for quantization and clamping.

    Args:
      inputs: a tensor containing values to be quantized.
      per_channel: (Optional) a boolean specifying whether to use different
        quantization ranges per output channel.
      init_min: a float scalar, the initial value for variable min.
      init_max: a float scalar, the initial value for variable max.
      name_prefix: name_prefix for created nodes.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      is_training: Whether the op is applied to a training or eval graph.
      num_bits: Number of bits to use for quantization, must be between 2 and 8.
      narrow_range: Whether to use the narrow quantization range
        [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
      relative_quantile: Specify the location of quantization min and max
        parameters. relative_quantile = 0 is equivalent to using min and max
        of input; relative_quantile = 1 set min and max the optimal location
        assuming the input distribution is uniform. In reality, a good value
        should be in the range [0 1].
      freeze: If True, the min and max variables are calculated once at the
        begining of training and then freeze. This is used for quantized
        fine-tuning of a pretrained checkpoint. If False, the min and max are
        calculated and updated every cycle.
      quant_delay: The number of global steps after which the fake quantization
        are turned on. Used for performing fine-tuning experiment without
        starting from a pre-trained checkpoint.
    Returns:
      a tensor containing quantized values.
    """

    with tf.variable_scope(
        None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope:
      scope.set_partitioner(None)
      input_shape = inputs.get_shape()
      input_dim = len(input_shape)
      if per_channel:
        # Only support quantizing 1-, 2- and 4-dimensional tensors.
        assert input_dim in [1, 2, 4]
        min_max_shape = [input_shape[-1]]
      else:
        min_max_shape = []

      min_var = tf.get_variable('min',
                                min_max_shape,
                                tf.float32,
                                initializer=tf.constant_initializer(init_min),
                                trainable=False)
      max_var = tf.get_variable('max',
                                min_max_shape,
                                tf.float32,
                                initializer=tf.constant_initializer(init_max),
                                trainable=False)
      if not is_training:
        return self.delayed_quant(
            inputs,
            min_var,
            max_var,
            per_channel=per_channel,
            num_bits=num_bits,
            narrow_range=narrow_range,
            quant_delay=None)

      if per_channel:
        if input_dim == 2:
          reduce_dims = [0]
        elif input_dim == 4:
          reduce_dims = [0, 1, 2]

      if num_bits >= 4:
        quantile = 0
      else:
        quantile = (1.0 / 2.0**(num_bits + 1.0)) * relative_quantile * 100

      if per_channel:
        if input_dim >= 2:
          batch_min = tfp.stats.percentile(
              inputs, q=quantile, axis=reduce_dims, name='BatchMin')
        else:
          batch_min = inputs
      else:
        batch_min = tfp.stats.percentile(
            inputs, q=quantile, name='BatchMin')

      if per_channel:
        if input_dim >= 2:
          batch_max = tfp.stats.percentile(
              inputs, q=100 - quantile, axis=reduce_dims, name='BatchMax')
        else:
          batch_max = inputs
      else:
        batch_max = tfp.stats.percentile(
            inputs, q=100 - quantile, name='BatchMax')

      if narrow_range:
        multiplier = 1.0
      else:
        multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0)

      batch_abs_max = tf.maximum(tf.abs(batch_min), tf.abs(batch_max))

      if narrow_range:
        batch_adjusted_min = 0 - batch_abs_max
      else:
        multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0)
        batch_adjusted_min = 0 - tf.scalar_mul(multiplier, batch_abs_max)

      batch_abs_max = tf.cast(batch_abs_max, tf.float32)
      batch_adjusted_min = tf.cast(batch_adjusted_min, tf.float32)

      if freeze:
        def make_var_op(var):
          def f():
            return var
          return f

        quant_step = common.CreateOrGetQuantizationStep()
        min_max_assign = tf.less_equal(
            quant_step, 1, name='MinMaxAssign')
        min_value = tf.cond(min_max_assign,
                            make_var_op(batch_adjusted_min),
                            make_var_op(min_var),
                            name='AssignMinCond')
        max_value = tf.cond(min_max_assign,
                            make_var_op(batch_abs_max),
                            make_var_op(max_var),
                            name='AssignMaxCond')
      else:
        min_value = batch_adjusted_min
        max_value = batch_abs_max

      assign_min = tf.assign(min_var, min_value)
      assign_max = tf.assign(max_var, max_value)

      return self.delayed_quant(
          inputs,
          assign_min,
          assign_max,
          per_channel=per_channel,
          num_bits=num_bits,
          narrow_range=narrow_range,
          quant_delay=quant_delay)
예제 #15
0
def train(model_path, learning_rate, epoch, noisy=False):
    total_epoch = epoch
    teacher = nin()
    student = lenet()
    if noisy == True:
        drop_scale = 1 / Nratio
        noisy_mask = tf.nn.dropout(tf.constant(
            np.float32(np.ones((batch_size, 1))) / drop_scale),
                                   keep_prob=Nratio)  #(batchsize,1)
        gaussian = tf.random_normal(shape=[batch_size, 1],
                                    mean=0.0,
                                    stddev=Nsigma)
        noisy = tf.mul(noisy_mask, gaussian)
        #noisy_add = tf.add(tf.constant(np.float32(np.ones((batch_size,1)))), noisy)
        teacher = tf.mul(teacher,
                         tf.tile(noisy, tf.constant([1, 10])))  #(batchsize,10)
        #teacher = tf.add(teacher, tf.tile(noisy,tf.constant([1,10])))
        print(bcolors.G + "prepare for training, noisy mode" + bcolors.END)
        tf_loss = tf.nn.l2_loss(teacher - student) / batch_size
    elif KD == True:  # correct Hinton method at 2017.1.3
        print(bcolors.G + "prepare for training, knowledge distilling mode" +
              bcolors.END)
        one_hot = tf.one_hot(y, n_classes, 1.0, 0.0)
        #one_hot = tf.cast(one_hot_int, tf.float32)
        teacher_tau = tf.scalar_mul(1.0 / tau, teacher)
        student_tau = tf.scalar_mul(1.0 / tau, student)
        objective1 = tf.nn.sigmoid_cross_entropy_with_logits(
            student_tau, one_hot)
        objective2 = tf.scalar_mul(0.5, tf.square(student_tau - teacher_tau))
        tf_loss = (lamda * tf.reduce_sum(objective1) +
                   (1 - lamda) * tf.reduce_sum(objective2)) / batch_size
    else:
        print(bcolors.G + "prepare for training, NIPS2014 mode" + bcolors.END)
        tf_loss = tf.nn.l2_loss(teacher - student) / batch_size

    optimizer1 = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(tf_loss)
    optimizer2 = tf.train.AdamOptimizer(learning_rate=learning_rate /
                                        10).minimize(tf_loss)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        gpu_options=gpu_options, allow_soft_placement=True))
    tf.initialize_all_variables().run()
    with tf.device('/cpu:0'):
        saver = tf.train.Saver(max_to_keep=100)
        #saver.restore(sess, os.path.join(model_path,'model-99')
    data, label = read_cifar10('train')
    index = np.array(range(len(data)))  # index randomly ordered
    mean = cal_mean()
    begin = time.time()
    iterations = len(data) // batch_size
    decay_step = int(total_epoch * 0.8)
    cnt = 0
    dropout_rate = dropout
    print(bcolors.G + "number of iterations (per epoch) =" +
          str(len(data) / batch_size) + bcolors.END)
    for i in range(total_epoch):
        np.random.shuffle(index)
        cost_sum = 0
        for j in range(iterations):
            batch_x = np.float32(
                data[index[j * batch_size:(j + 1) * batch_size]]) - mean
            batch_y = np.squeeze(
                np.float32(label[index[j * batch_size:(j + 1) * batch_size]]))
            if cnt / decay_step == 0:
                lr = learning_rate
                _, cost = sess.run([optimizer1, tf_loss],
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y,
                                       keep_prob: 1 - dropout_rate
                                   })
            elif cnt / decay_step == 1:
                lr = learning_rate / 10
                _, cost = sess.run([optimizer2, tf_loss],
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y,
                                       keep_prob: 1 - dropout_rate
                                   })
            cost_sum += cost
            #pdb.set_trace()
            #if (j % int(iterations*0.25) == 0):
            #    print(("epoch %d-iter %d, cost = %f , avg-cost = %f"%(i, j, cost, cost/n_classes))
            #    sys.stdout.flush()
        cnt += 1
        avg_time = time.time() - begin
        print(
            "epoch %d - avg. %f seconds in each epoch, lr = %.0e, cost = %f , avg-cost-per-logits = %f"
            % (i, avg_time / cnt, lr, cost_sum,
               cost_sum / iterations / n_classes))
        if np.mod(i + 1, 10) == 0:
            print("Epoch ", i + 1, " is done. Saving the model ...")
            with tf.device('/cpu:0'):
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                saver.save(sess,
                           os.path.join(model_path, 'model'),
                           global_step=i)
        sys.stdout.flush()
예제 #16
0
def main():
    """
    Create the model and start the training
    """

    # Get the CL arguments
    args = get_arguments()

    # Check if the network architecture is valid
    if args.arch not in VALID_ARCHS:
        raise ValueError("Network architecture %s is not supported!"%(args.arch))

    # Check if the method to compute importance is valid
    if args.imp_method not in MODELS:
        raise ValueError("Importance measure %s is undefined!"%(args.imp_method))
    
    # Check if the optimizer is valid
    if args.optim not in VALID_OPTIMS:
        raise ValueError("Optimizer %s is undefined!"%(args.optim))

    # Create log directories to store the results
    if not os.path.exists(args.log_dir):
        print('Log directory %s created!'%(args.log_dir))
        os.makedirs(args.log_dir)

    # Generate the experiment key and store the meta data in a file
    exper_meta_data = {'DATASET': 'PERMUTE_MNIST',
            'NUM_RUNS': args.num_runs,
            'TRAIN_SINGLE_EPOCH': args.train_single_epoch, 
            'IMP_METHOD': args.imp_method, 
            'SYNAP_STGTH': args.synap_stgth,
            'FISHER_EMA_DECAY': args.fisher_ema_decay,
            'FISHER_UPDATE_AFTER': args.fisher_update_after,
            'OPTIM': args.optim, 
            'LR': args.learning_rate, 
            'BATCH_SIZE': args.batch_size, 
            'MEM_SIZE': args.mem_size}
    experiment_id = "PERMUTE_MNIST_HERDING_%s_%s_%s_%s_%r_%s-"%(args.arch, args.train_single_epoch, args.imp_method, str(args.synap_stgth).replace('.', '_'), 
            str(args.batch_size), str(args.mem_size)) + datetime.datetime.now().strftime("%y-%m-%d-%H-%M")
    snapshot_experiment_meta_data(args.log_dir, experiment_id, exper_meta_data)

    # Get the subset of data depending on training or cross-validation mode
    if args.online_cross_val:
        num_tasks = K_FOR_CROSS_VAL
    else:
        num_tasks = NUM_TASKS - K_FOR_CROSS_VAL

    # Variables to store the accuracies and standard deviations of the experiment
    acc_mean = dict()
    acc_std = dict()

    # Reset the default graph
    ops.reset_default_graph()
    graph  = tf.Graph()
    with graph.as_default():

        # Set the random seed
        tf.set_random_seed(args.random_seed)

        # Define Input and Output of the model
        x = tf.placeholder(tf.float32, shape=[None, INPUT_FEATURE_SIZE])
        #x = tf.placeholder(tf.float32, shape=[None, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS])
        if args.imp_method == 'PNN':
            y_ = []
            for i in range(num_tasks):
                y_.append(tf.placeholder(tf.float32, shape=[None, TOTAL_CLASSES]))
        else:
            y_ = tf.placeholder(tf.float32, shape=[None, TOTAL_CLASSES])

        # Define the optimizer
        if args.optim == 'ADAM':
            opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate)

        elif args.optim == 'SGD':
            opt = tf.train.GradientDescentOptimizer(learning_rate=args.learning_rate)

        elif args.optim == 'MOMENTUM':
            base_lr = tf.constant(args.learning_rate)
            learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - train_step / training_iters), OPT_POWER))
            opt = tf.train.MomentumOptimizer(args.learning_rate, OPT_MOMENTUM)

        # Create the Model/ contruct the graph
        model = Model(x, y_, num_tasks, opt, args.imp_method, args.synap_stgth, args.fisher_update_after, 
                args.fisher_ema_decay, network_arch=args.arch)

        # Set up tf session and initialize variables.
        if USE_GPU:
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
        else:
            config = tf.ConfigProto(
                    device_count = {'GPU': 0}
                    )

        time_start = time.time()
        with tf.Session(config=config, graph=graph) as sess:
            runs = train_task_sequence(model, sess, args)
            # Close the session
            sess.close()
        time_end = time.time()
        time_spent = time_end - time_start

    # Store all the results in one dictionary to process later
    exper_acc = dict(mean=runs)

    # If cross-validation flag is enabled, store the stuff in a text file
    if args.cross_validate_mode:
        acc_mean = runs.mean(0)
        acc_std = runs.std(0)
        cross_validate_dump_file = args.log_dir + '/' + 'PERMUTE_MNIST_%s_%s'%(args.imp_method, args.optim) + '.txt'
        with open(cross_validate_dump_file, 'a') as f:
            if MULTI_TASK:
                f.write('GPU:{} \t ARCH: {} \t LR:{} \t LAMBDA: {} \t ACC: {}\n'.format(USE_GPU, args.arch, args.learning_rate, 
                    args.synap_stgth, acc_mean[-1, :].mean()))
            else:
                f.write('GPU: {} \t ARCH: {} \t LR:{} \t LAMBDA: {} \t ACC: {} \t Fgt: {} \t Time: {}\n'.format(USE_GPU, args.arch, args.learning_rate, 
                    args.synap_stgth, acc_mean[-1, :].mean(), compute_fgt(acc_mean), str(time_spent)))

    # Store the experiment output to a file
    snapshot_experiment_eval(args.log_dir, experiment_id, exper_acc)
예제 #17
0
    plt.scatter(data['x'][:, 0], data['y'])
    plt.xlabel('Date')
    plt.ylabel('Number of newly infected')

    X = tf.placeholder(name='X', shape=(None, nb_features), dtype=tf.float32)
    Y = tf.placeholder(name='Y', shape=(None), dtype=tf.float32)
    w = tf.Variable(tf.zeros(nb_features), name='W')
    bias = tf.Variable(0.0)

    w_col = tf.reshape(w, (nb_features, 1), name='W_col')
    hyp = tf.add(tf.matmul(X, w_col), bias, name='Hyp')

    Y_col = tf.reshape(Y, (-1, 1), name='Y_col')

    l2_reg = tf.scalar_mul(lmbd, tf.reduce_mean(tf.square(w)), name='L2_reg')

    mse = tf.reduce_mean(tf.square(hyp - Y_col), name='Mse')
    loss = tf.add(mse, l2_reg, name='loss')

    opt_op = tf.train.AdamOptimizer(name="opt_op").minimize(loss)

    with tf.Session() as sess:
        writer = tf.summary.FileWriter('./graphs', graph=sess.graph)
        sess.run(tf.global_variables_initializer())

        # Izvršavamo 100 epoha treninga.
        nb_epochs = 100
        for epoch in range(nb_epochs):

            # Stochastic Gradient Descent.
예제 #18
0
    def _define_desc_graph(self):
        with tf.variable_scope('desc'):
            self.desc1 = AM_desc1_batch = tf.placeholder(
                dtype=tf.float32,
                shape=[None, self.default_desc_length, self.wv_dim],
                name='desc1')

            self.desc2 = AM_desc2_batch = tf.placeholder(
                dtype=tf.float32,
                shape=[None, self.default_desc_length, self.wv_dim],
                name='desc2')

            gru_1 = tf.keras.layers.GRU(units=self.wv_dim,
                                        return_sequences=True)

            gru_5 = tf.keras.layers.GRU(units=self.wv_dim,
                                        return_sequences=True)

            conv1 = tf.keras.layers.Conv1D(filters=self.wv_dim,
                                           kernel_size=3,
                                           strides=1,
                                           activation=tf.tanh,
                                           padding='valid',
                                           use_bias=True)

            ds3 = tf.keras.layers.Dense(units=self.wv_dim,
                                        activation=tf.tanh,
                                        use_bias=True)

            self._att1 = att1 = tf.keras.layers.Dense(units=1,
                                                      activation='tanh',
                                                      use_bias=True)
            self._att3 = att3 = tf.keras.layers.Dense(units=1,
                                                      activation='tanh',
                                                      use_bias=True)

            # gru_+att1
            mp1_b = conv1(gru_1(AM_desc1_batch))
            mp2_b = conv1(gru_1(AM_desc2_batch))

            att1_w = tf.keras.activations.softmax(att1(mp1_b), axis=-2)
            att2_w = tf.keras.activations.softmax(att1(mp2_b), axis=-2)

            size1 = self.default_desc_length

            mp1_b = tf.multiply(mp1_b, tf.scalar_mul(size1, att1_w))
            mp2_b = tf.multiply(mp2_b, tf.scalar_mul(size1, att2_w))

            # gru_+at3
            mp1_b = gru_5(mp1_b)
            mp2_b = gru_5(mp2_b)

            att1_w = tf.keras.activations.softmax(att3(mp1_b), axis=-2)
            att2_w = tf.keras.activations.softmax(att3(mp2_b), axis=-2)

            mp1_b = tf.multiply(mp1_b, att1_w)
            mp2_b = tf.multiply(mp2_b, att2_w)

            # last ds
            ds1_b = tf.reduce_sum(mp1_b, 1)
            ds2_b = tf.reduce_sum(mp2_b, 1)
            eb_desc_batch1 = tf.nn.l2_normalize(ds3(ds1_b), dim=1)
            eb_desc_batch2 = tf.nn.l2_normalize(
                ds3(ds2_b), dim=1)  # tf.nn.l2_normalize(DS4(ds2_b), dim=1)

            indicator = np.empty((self.desc_batch_size, self.desc_batch_size),
                                 dtype=np.float32)
            indicator.fill(self.negative_indication_weight)
            np.fill_diagonal(indicator, 1.)
            indicator = tf.constant(indicator)

            self.desc_loss = -tf.reduce_sum(
                tf.log(
                    tf.sigmoid(
                        tf.multiply(
                            tf.matmul(eb_desc_batch1,
                                      tf.transpose(eb_desc_batch2)),
                            indicator)) + 0.)) / self.desc_batch_size

            self.desc_embedding1 = eb_desc_batch1
            self.desc_embedding2 = eb_desc_batch2

            # opt_vars = [v for v in tf.trainable_variables() if v.name.startswith("desc")]
            self.desc_optimizer = get_optimizer(
                self.args.optimizer,
                self.args.learning_rate).minimize(self.desc_loss)
예제 #19
0
파일: main.py 프로젝트: piccaSun/Research
def main(_):

    # Configure checkpoint/samples dir
    tl.files.exists_or_mkdir(a.checkpoint_dir)
    tl.files.exists_or_mkdir(a.sample_dir)

    #read gaussian

    CLIP = [-0.01, 0.01]
    CRITIC_NUM = 5

    data_files = os.listdir("./gaussian_dataset")
    num_files = len(data_files)
    for i in range(num_files):
        data_files[i] = int(data_files[i].split('.')[0].split('_')[2])
    # print(data_files)

    data_files.sort()

    #print(data_files)

    for i in range(num_files):
        data_files[i] = "./gaussian_dataset/gaussianheavy_blackaverage_" + str(
            data_files[i]).zfill(4) + ".png"

    images = []

    for file in data_files:
        image = get_image(file,
                          a.image_size,
                          is_crop=a.is_crop,
                          resize_w=a.output_size,
                          is_grayscale=False)

        #bark36-color channel=3
        image = image[:, :, np.newaxis]

        #print(image.shape)
        #time.sleep(5)
        images.append(image)

    # Construct graph on GPU
    with tf.device("/gpu:0"):

        #Define Models #
        ################################################################################################

        x_l = tf.placeholder(tf.float32, [None, 1], name='x_noise')
        y_l = tf.placeholder(tf.float32, [None, 1], name='y_noise')
        z_l = tf.placeholder(tf.float32, [None, 1], name='z_noise')
        #z = [tf.cos(theta),tf.sin(theta)]
        # x_l = 10*tf.sin(phi)*tf.cos(theta)
        # y_l = 10*tf.sin(phi)*tf.sin(theta)
        # z_l = 10*tf.cos(phi)

        z = tf.concat([x_l, y_l, z_l], axis=1)

        real_images = tf.placeholder(
            tf.float32, [None, a.output_size, a.output_size, a.c_dim],
            name='real_images')

        sess = tf.InteractiveSession()

        # Input noise into generator for training####################reuse
        net_g = generator(z, is_train=True, reuse=False)
        #net_g = generator(z , is_train=True, reuse=True)

        # Input real and generated fake images into discriminator for training
        net_d1, d1g_logits1 = discriminator1(net_g.outputs,
                                             is_train=True,
                                             reuse=False)
        #net_d, d_logits = discriminator(net_g.outputs, is_train=True, reuse=True)
        _, d1x_logits1 = discriminator1(real_images, is_train=True, reuse=True)

        # Input noise into generator for ###################################evaluation
        # set is_train to False so that BatchNormLayer behave differently
        net_g2 = generator(z, is_train=False, reuse=True)

        #Define Training Operations #
        # discriminator: real images are labelled as 1 ##############by using tf.ones_like(),make every tensor to be 1,as target
        #d1_loss_real = tl.cost.sigmoid_cross_entropy(d1x_logits1, tf.ones_like(d1x_logits1), name='d1real')
        d1_loss_real = tf.reduce_mean(
            tf.scalar_mul(-1, d1x_logits1, name='d1real'))
        # discriminator: images from generator (fake) are labelled as 0
        #d1_loss_fake = tl.cost.sigmoid_cross_entropy(d1g_logits1, tf.zeros_like(d1g_logits1), name='d1fake')

        d1_loss_fake = tf.reduce_mean(d1g_logits1, name='d1fake')
        # cost for updating discriminator
        d1_loss = 0.5 * (d1_loss_real + d1_loss_fake)

        #d2
        # Input real and generated fake images into discriminator for training
        #net_d2, d2g_logits2 = discriminator2(net_g.outputs, is_train=True, reuse=False)
        net_d2, d2g_logits2 = discriminator2(real_images,
                                             is_train=True,
                                             reuse=False)
        #net_d, d_logits = discriminator(net_g.outputs, is_train=True, reuse=True)
        #_, d2x_logits2 = discriminator2(real_images, is_train=True, reuse=True)
        _, d2x_logits2 = discriminator2(net_g.outputs,
                                        is_train=True,
                                        reuse=True)

        #with tf.name_scope("d2_loss_real"):
        #d2_loss_real = tl.cost.sigmoid_cross_entropy(d2x_logits2, tf.zeros_like(d2x_logits2), name='d2real')
        d2_loss_real = tf.reduce_mean(
            tf.scalar_mul(-1, d2x_logits1, name='d2real'))
        # discriminator: images from generator (fake) are labelled as 0
        #with tf.name_scope("d2_loss_real"):
        #d2_loss_fake = tl.cost.sigmoid_cross_entropy(d2g_logits2, tf.ones_like(d2g_logits2), name='d2fake')
        d2_loss_fake = tf.reduce_mean(d2g_logits1, name='d2fake')
        # cost for updating discriminator
        #with tf.name_scope("d2_loss"):
        d2_loss = 0.5 * (d2_loss_real + d2_loss_fake)

        #with tf.name_scope("d_loss"):
        d_loss = d1_loss + d2_loss

        h4_params = tl.layers.get_variables_with_name(
            name='discriminator/d/h4/lin_sigmoid', train_only=True)
        h5_params = tl.layers.get_variables_with_name(
            name='discriminator/d/h5/lin_sigmoid', train_only=True)
        l2_params = h4_params + h5_params
        l2_wl = 0.0002

        for p in l2_params:
            weight_loss = tf.multiply(tf.nn.l2_loss(p), l2_wl)
            d_loss += weight_loss

        # generator: try to make the the fake images look real (1)
        #g1_loss = tl.cost.sigmoid_cross_entropy(d1g_logits1, tf.ones_like(d1g_logits1), name='g1fake')
        #g2_loss = tl.cost.sigmoid_cross_entropy(d2g_logits2, tf.zeros_like(d2g_logits2), name='g2fake')
        g1_loss = tf.reduce_mean(tf.scalar_mul(-1, d1g_logits1), name='g1fake')
        g2_loss = tf.reduce_mean(tf.scalar_mul(-1, d2g_logits2), name='g2fake')
        g_loss = 0.5 * (g1_loss + g2_loss)

        g_vars = tl.layers.get_variables_with_name('generator', True, True)
        d_vars = tl.layers.get_variables_with_name('discriminator', True, True)

        # Define optimizers for updating discriminator and generator
        # d_optim = tf.train.AdamOptimizer(a.learning_rate, beta1=a.beta1) \
        #                   .minimize(d_loss, var_list=d_vars)
        # g_optim = tf.train.AdamOptimizer(a.learning_rate, beta1=a.beta1) \
        #                   .minimize(g_loss, var_list=g_vars)
        d_optim = tf.train.RMSPropOptimizer(a.learning_rate)\
                          .minimize(d_loss, var_list=d_vars)
        g_optim = tf.train.RMSPropOptimizer(a.learning_rate) \
                          .minimize(g_loss, var_list=g_vars)

        clip_d_op = [
            var.assign(tf.clip_by_value(var, CLIP[0], CLIP[1]))
            for var in d_vars
        ]
    # Init Session
    #sess = tf.InteractiveSession()

    f = pd.read_csv('./plant6.csv')
    f.columns = ["COL1", "COL2", "COL3"]

    x_label = f[["COL1"]]
    x_label = np.array(x_label)

    y_label = f[["COL2"]]
    y_label = np.array(y_label)

    z_label = f[["COL3"]]
    z_label = np.array(z_label)

    index2 = np.arange(0, 72, 1)
    # for i in range(num_files):
    #     x_label[i] = x_label
    #     y_label[i] = y_label
    #     z_label[i] = z_label
    images = np.asarray(images)
    sample_x_label = x_label[index2]
    sample_y_label = y_label[index2]
    sample_z_label = z_label[index2]
    sample_image = images[index2]
    batch_x_label = x_label[index2]
    batch_y_label = y_label[index2]
    batch_z_label = z_label[index2]
    batch_images = images[index2]

    with tf.name_scope('summary'):

        tf.summary.scalar('d1_loss', d1_loss)
        tf.summary.scalar('d2_loss', d2_loss)
        tf.summary.scalar('d_loss', d_loss)
        tf.summary.scalar('g1_loss', g1_loss)
        tf.summary.scalar('g2_loss', g2_loss)
        tf.summary.scalar('g_loss', g_loss)
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter('./logs', sess.graph)

    sess.run(tf.global_variables_initializer())

    model_dir = "%s_%s_%s" % (a.dataset, a.batch_size, a.output_size)
    save_dir = os.path.join(a.checkpoint_dir, model_dir)
    tl.files.exists_or_mkdir(a.sample_dir)
    tl.files.exists_or_mkdir(save_dir)

    # load the latest checkpoints
    net_g_name = os.path.join(save_dir, 'net_g.npz')
    net_d1_name = os.path.join(save_dir, 'net_d1.npz')
    net_d2_name = os.path.join(save_dir, 'net_d2.npz')

    #Training models #
    iter_counter = 0
    index = np.arange(72)
    for epoch in range(a.epoch):
        np.random.shuffle(index)

        #steps = 0
        for start_index in range(0, 72, a.batch_size):
            end_index = start_index + a.batch_size
            start_time = time.time()

            if start_index < 25 or start_index % 500 == 0:
                critic_num = 25
            else:
                critic_num = CRITIC_NUM

            for _ in range(critic_num):
                # Updates the Discriminator(D)
                summary, errD, _ = sess.run(
                    [merged, d_loss, d_optim],
                    feed_dict={
                        x_l: batch_x_label[index[start_index:end_index]],
                        y_l: batch_y_label[index[start_index:end_index]],
                        z_l: batch_z_label[index[start_index:end_index]],
                        real_images: batch_images[index[start_index:end_index]]
                    })
                sess.run(clio_d_op)

            # Updates the Discriminator(D)
            # summary, errD, _ = sess.run([merged, d_loss, d_optim], feed_dict={x_l: batch_x_label[index[start_index:end_index]],
            #     y_l: batch_y_label[index[start_index:end_index]],z_l: batch_z_label[index[start_index:end_index]],
            #     real_images: batch_images[index[start_index:end_index]]})

            # Updates the Generator(G)
            # run generator twice to make sure that d_loss does not go to zero (different from paper)##########################
            for _ in range(2):
                errG, _ = sess.run(
                    [g_loss, g_optim],
                    feed_dict={
                        x_l: batch_x_label[index[start_index:end_index]],
                        y_l: batch_y_label[index[start_index:end_index]],
                        z_l: batch_z_label[index[start_index:end_index]]
                    })

            end_time = time.time() - start_time
            #print("Epoch: [%2d/%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
            #        % (epoch, FLAGS.epoch, steps, batch_steps, end_time, errD, errG))
            print("Epoch: [%2d/%2d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
                    % (epoch, a.epoch, end_time, errD, errG))

            iter_counter += 1
            if np.mod(iter_counter, a.sample_step) == 0:
                # Generate images########################################################################the diffrence with feed-z_batch ?
                img, errD, errG = sess.run(
                    [net_g2.outputs, d_loss, g_loss],
                    feed_dict={
                        x_l: sample_x_label,
                        y_l: sample_y_label,
                        z_l: sample_z_label,
                        real_images: sample_image
                    })
                # Visualize generated images
                #tl.visualize.save_images(img, [num_tiles, num_tiles], './{}/train_{:02d}_{:04d}.png'.format(FLAGS.sample_dir, epoch, steps))
                print("[Sample] d_loss: %.8f, g_loss: %.8f" % (errD, errG))

            if np.mod(iter_counter, a.save_step) == 0:
                # Save current network parameters
                print("[*] Saving checkpoints...")
                tl.files.save_npz(net_g.all_params, name=net_g_name, sess=sess)
                tl.files.save_npz(net_d1.all_params,
                                  name=net_d1_name,
                                  sess=sess)
                tl.files.save_npz(net_d2.all_params,
                                  name=net_d2_name,
                                  sess=sess)
                print("[*] Saving checkpoints SUCCESS!")
            writer.add_summary(summary, iter_counter)


#https://www.cnblogs.com/Charles-Wan/p/6501945.html

    print("finish training, start testing...")

    #150
    #   noise_theta = np.zeros(shape = [150,1],dtype = np.float32)
    # for i in range(150):
    #     noise_theta[i] = np.array([(360.0/150.0)*i*math.pi/180])

    # index = np.arange(0,150,1)
    # test_theta =noise_theta[index]

    # generated_images = sess.run(net_g2.outputs,
    #     feed_dict={
    #         theta: test_theta,
    #     })

    t = pd.read_csv('./location_150.csv')
    t.columns = ["COL1", "COL2", "COL3"]
    x_test = t[["COL1"]]
    x_test = np.array(x_test)

    y_test = t[["COL2"]]
    y_test = np.array(y_test)

    z_test = t[["COL3"]]
    z_test = np.array(z_test)

    index = np.arange(0, 150, 1)
    x_test = x_test[index]
    y_test = y_test[index]
    z_test = z_test[index]

    generated_images = sess.run(net_g2.outputs,
                                feed_dict={
                                    x_l: x_test,
                                    y_l: y_test,
                                    z_l: z_test,
                                })

    #img=[]
    for i in range(150):
        #img = img_as_ubyte(generated_images[i])

        #tf.image.encode_png(generated_images[i],compression=-1,name=None)

        # mn = generated_images[i].min()
        # mx = generated_images[i].max()
        # mx -= mn
        # generated_images[i]=generated_images[i].astype(np.uint8)
        #generated_images[i] = generated_images[i]/generated_images[i].max()
        #generated_images[i] = 255*generated_images[i]

        #tl.visualize.save_image(generated_images[i].astype(np.uint8), './{}/train_{:02d}.png'.format(FLAGS.sample_dir, i))
        #steps += 1
        generated_images[i] = 128 * generated_images[i] + 127
        np.clip(generated_images[i], 0, 255)

        tl.visualize.save_image(
            generated_images[i].astype(np.uint8),
            './{}/train_{:02d}.png'.format(a.sample_dir, i))

    print("testing is finished")
    writer.close()
    sess.close()
def build_one_phase(layerxk, layerzk, Phi, PhiT, Yinput, phase, lambdavalue):
    # params
    lambdaStep = tf.Variable(lambdavalue, dtype=tf.float32)
    eta = 0.95
    xi = 0.95

    softThr = tf.Variable(0.1, dtype=tf.float32)
    t = tf.Variable(1, dtype=tf.float32)
    convSize1 = 64
    convSize2 = 64
    convSize3 = 64
    filterSize1 = 3
    filterSize2 = 3
    filterSize3 = 3

    # get rk from zk
    rk = tf.reduce_sum(tf.multiply(Phi, layerzk[-1]), axis=3)
    rk = tf.reshape(rk, shape=[-1, pixel, pixel, 1])
    rk = tf.subtract(rk, Yinput)
    rk = tf.multiply(PhiT, tf.tile(rk, [1, 1, 1, nFrame]))
    rk = tf.scalar_mul(lambdaStep, rk)
    rk = tf.subtract(layerzk[-1], rk)

    # F(rk)
    weight0 = get_filter([filterSize1, filterSize1, nFrame, convSize1], 0)
    weight11 = get_filter([filterSize2, filterSize2, convSize1, convSize2], 11)
    weight12 = get_filter([filterSize3, filterSize3, convSize2, convSize3], 12)
    Frk = tf.nn.conv2d(rk, weight0, strides=[1, 1, 1, 1], padding='SAME')
    tmp = Frk
    Frk = tf.nn.conv2d(Frk, weight11, strides=[1, 1, 1, 1], padding='SAME')
    Frk = tf.nn.relu(Frk)
    Frk = tf.nn.conv2d(Frk, weight12, strides=[1, 1, 1, 1], padding='SAME')

    # soft threshold, soft(F(rk), softThr)
    softFrk = tf.multiply(tf.sign(Frk),
                          tf.nn.relu(tf.subtract(tf.abs(Frk), softThr)))

    # ~F(soft(F(rk), softThr))
    weight13 = get_filter([filterSize3, filterSize3, convSize3, convSize2], 53)
    weight14 = get_filter([filterSize2, filterSize2, convSize2, convSize1], 54)
    weight6 = get_filter([filterSize1, filterSize1, convSize1, nFrame], 6)
    FFrk = tf.nn.conv2d(softFrk,
                        weight13,
                        strides=[1, 1, 1, 1],
                        padding='SAME')
    FFrk = tf.nn.relu(FFrk)
    FFrk = tf.nn.conv2d(FFrk, weight14, strides=[1, 1, 1, 1], padding='SAME')
    FFrk = tf.nn.conv2d(FFrk, weight6, strides=[1, 1, 1, 1], padding='SAME')
    # xk = rk + ~F(soft(F(rk), softThr))
    xk = tf.add(rk, FFrk)
    print(t)
    zk = t * xk + (1 - t) * layerxk[-1]
    if (phase >= 1):
        delta0 = eta * tf.norm(layerxk[-1] - layerxk[-2])
        delta1 = tf.norm(xk - layerxk[-1])
        larger = tf.math.less(delta0, delta1)
        if (larger == "True"):
            lambdavalue = xi * lambdavalue
    # Symmetric constraint
    sFFrk = tf.nn.conv2d(Frk, weight13, strides=[1, 1, 1, 1], padding='SAME')
    sFFrk = tf.nn.relu(sFFrk)
    sFFrk = tf.nn.conv2d(sFFrk, weight14, strides=[1, 1, 1, 1], padding='SAME')
    symmetric = sFFrk - tmp
    return xk, zk, symmetric, Frk, lambdavalue
예제 #21
0
    def rnn_decoder(self,
                    encode_embed,
                    attention_states,
                    initial_state,
                    cell,
                    num_heads=1,
                    loop_function=None,
                    dtype=dtypes.float32,
                    scope=None,
                    initial_state_attention=False):
        """RNN decoder for the sequence-to-sequence model.

        """
        with tf.variable_scope(scope or "rnn_decoder"):
            batch_size = tf.shape(encode_embed[0])[0]  # Needed for reshaping.
            # cprint('batch_size: {}'.format(batch_size), 'green')  # Tensor("ranking_model/ranking_model/embedding_rnn_decoder/rnn_decoder/strided_slice_1:0", shape=(), dtype=int32)
            # cprint('batch_size.get_shape(): {}'.format(batch_size.get_shape()), 'red')  # ()
            # number of output vector in sequence
            attn_length = attention_states.get_shape()[1].value
            # the dimension size of each output vector
            attn_size = attention_states.get_shape()[2].value
            # the dimension size of state vector
            state_size = initial_state.get_shape()[1].value
            print(batch_size, attn_length, attn_size, state_size,
                  "batch_size, attn_length, attn_size, state_size")
            # To calculate W1 * h_t we use a 1-by-1 convolution, need to
            # reshape before.
            print(attention_states.get_shape(),
                  "attention_states.get_shape()")  # (?, 9, 186)
            hidden = tf.reshape(attention_states,
                                [-1, attn_length, 1, attn_size])
            hidden_features = []
            hidden_features2 = []
            v = []
            u = []
            linear_w = []
            linear_b = []
            abstract_w = []
            abstract_b = []
            abstract_layers = [
                int((attn_size + state_size) / (2 + 2 * i)) for i in xrange(2)
            ] + [1]
            # Size of query vectors for attention.
            attention_vec_size = attn_size
            head_weights = []
            for a in xrange(num_heads):
                k = self.get_variable("AttnW_%d" % a,
                                      [1, 1, attn_size, attention_vec_size])
                hidden_features.append(
                    nn_ops.conv2d(hidden, k, [1, 1, 1, 1],
                                  "SAME"))  # [B,T,1,attn_vec_size]
                k2 = self.get_variable("AttnW2_%d" % a,
                                       [1, 1, attn_size, attention_vec_size])
                hidden_features2.append(
                    nn_ops.conv2d(hidden, k2, [1, 1, 1, 1], "SAME"))
                v.append(
                    self.get_variable("AttnV_%d" % a, [attention_vec_size]))
                u.append(
                    self.get_variable("AttnU_%d" % a, [attention_vec_size]))
                head_weights.append(
                    self.get_variable("head_weight_%d" % a, [1]))
                current_layer_size = attn_size + state_size
                linear_w.append(
                    self.get_variable("linearW_%d" % a,
                                      [1, 1, current_layer_size, 1]))
                linear_b.append(self.get_variable("linearB_%d" % a, [1]))
                abstract_w.append([])
                abstract_b.append([])
                for i in xrange(len(abstract_layers)):
                    layer_size = abstract_layers[i]
                    abstract_w[a].append(
                        self.get_variable(
                            "Att_%d_layerW_%d" % (a, i),
                            [1, 1, current_layer_size, layer_size]))
                    abstract_b[a].append(
                        self.get_variable("Att_%d_layerB_%d" % (a, i),
                                          [layer_size]))
                    current_layer_size = layer_size

            def attention(query):
                """Put attention masks on hidden using hidden_features and query."""
                ds = []  # Results of attention reads will be stored here.
                aw = []  # Attention weights will be stored here
                tiled_query = tf.tile(
                    tf.reshape(query, [-1, 1, 1, state_size]),
                    [1, attn_length, 1, 1])
                print(hidden.get_shape(),
                      "hidden.get_shape()")  # (?, 9, 1, 186)
                print(tiled_query.get_shape(),
                      "tiled_query.get_shape()")  # (?, 9, 1, 186)
                concat_input = tf.concat(axis=3, values=[hidden, tiled_query])
                #concat_input = tf.concat(3, [hidden, hidden])
                for a in xrange(num_heads):
                    with tf.variable_scope("Attention_%d" % a):
                        s = None
                        if self.hparams.att_strategy == 'multi':
                            print('Attention: multiply')
                            y = linear(
                                query, attention_vec_size, True
                            )  # 第三个参数是boolean, whether to add a bias term or not.
                            y = tf.reshape(y, [-1, 1, 1, attention_vec_size])
                            # s = math_ops.reduce_sum(
                            # u[a] * math_ops.tanh(y * hidden_features[a]), [2,
                            # 3])
                            s = math_ops.reduce_sum(hidden * math_ops.tanh(y),
                                                    [2, 3])
                            # hidden_features[a] * math_ops.tanh(y), [2, 3])

                        elif self.hparams.att_strategy == 'multi_add':
                            print('Attention: multiply_add')
                            y = linear(query,
                                       attention_vec_size,
                                       True,
                                       scope='y')
                            y2 = linear(query,
                                        attention_vec_size,
                                        True,
                                        scope='y2')
                            y = tf.reshape(y, [-1, 1, 1, attention_vec_size])
                            y2 = tf.reshape(y2, [-1, 1, 1, attention_vec_size])
                            # s = math_ops.reduce_sum(
                            # u[a] * math_ops.tanh(y * hidden_features[a]), [2,
                            # 3])
                            s = math_ops.reduce_sum(hidden * math_ops.tanh(y2),
                                                    [2, 3])
                            s = s + math_ops.reduce_sum(
                                v[a] * math_ops.tanh(hidden_features[a] + y),
                                [2, 3])

                        elif self.hparams.att_strategy == 'NTN':
                            print('Attention: NTN')
                            y = linear(query, attn_size, False)
                            y = tf.tile(tf.reshape(y, [-1, 1, 1, attn_size]),
                                        [1, attn_length, 1, 1])
                            s = math_ops.reduce_sum(hidden * y,
                                                    [2, 3])  # bilnear
                            s = s + math_ops.reduce_sum(
                                nn_ops.conv2d(concat_input, linear_w[a],
                                              [1, 1, 1, 1], "SAME"),
                                [2, 3])  # linear
                            s = s + linear_b[a]  # bias
                            # print(s.get_shape())
                            # s = tf.tanh(s) #non linear

                        elif self.hparams.att_strategy == 'elu':
                            print('Attention: elu')

                            cur_input = concat_input
                            # for i in xrange(len(abstract_layers)):
                            #    cur_input = tf.contrib.layers.fully_connected(cur_input, abstract_layers[i], activation_fn=tf.nn.elu)
                            for i in xrange(len(abstract_layers)):
                                cur_input = nn_ops.conv2d(
                                    cur_input, abstract_w[a][i], [1, 1, 1, 1],
                                    "SAME")
                                cur_input = cur_input + abstract_b[a][i]
                                cur_input = tf.nn.elu(cur_input)
                            s = math_ops.reduce_sum(cur_input, [2, 3])

                        else:
                            print('Attention: add')
                            y = linear(query, attention_vec_size, True)
                            y = tf.reshape(y, [-1, 1, 1, attention_vec_size])
                            s = math_ops.reduce_sum(
                                v[a] * math_ops.tanh(hidden_features[a] + y),
                                [2, 3])

                        att = s * head_weights[a]  # nn_ops.softmax(s)
                        aw.append(att)
                        # Now calculate the attention-weighted vector d.
                        d = math_ops.reduce_sum(
                            tf.reshape(att, [-1, attn_length, 1, 1]) * hidden,
                            [1, 2])
                        ds.append(tf.reshape(d, [-1, attn_size]))
                return aw, ds

            state = initial_state
            outputs = []
            prev = None
            batch_attn_size = tf.stack([batch_size, attn_size])
            batch_attw_size = tf.stack([batch_size, attn_length])
            attns = [
                tf.zeros(batch_attn_size, dtype=dtype)
                for _ in xrange(num_heads)
            ]
            attw = [
                1.0 / attn_length * tf.ones(batch_attw_size, dtype=dtype)
                for _ in xrange(num_heads)
            ]
            for a in attns:  # Ensure the second shape of attention vectors is set.
                a.set_shape([None, attn_size])

            # Directly use previous state
            attw, attns = attention(initial_state)
            aw = math_ops.reduce_sum(attw, 0)
            output = tf.scalar_mul(1.0 / float(num_heads), aw)
            output = output - tf.reduce_min(output, 1, keep_dims=True)
            outputs.append(output)

        return outputs, state
예제 #22
0
# ReLU Layer 1 Gradient
dLdZ_1 = tf.multiply(tf.sign(A_1), dLdA_1)

# Linear Layer 1 Weight Gradients
dLdW_1 = tf.matmul(A_0, tf.transpose(dLdZ_1))
dLdW0_1 = tf.reduce_sum(dLdZ_1, axis=1, keepdims=True)

# Linear Layer 1 Gradient
dLdA_0 = tf.matmul(W_1, dLdZ_1)

################################################################################
# Parameter Update                                                             #
################################################################################

# Linear Layer 1 Weight Updates
W_1_sgd_step = W_1.assign_sub(tf.scalar_mul(0.005, dLdW_1))
W0_1_sgd_step = W0_1.assign_sub(tf.scalar_mul(0.005, dLdW0_1))

# Linear Layer 2 Weight Updates
W_2_sgd_step = W_2.assign_sub(tf.scalar_mul(0.005, dLdW_2))
W0_2_sgd_step = W0_2.assign_sub(tf.scalar_mul(0.005, dLdW0_2))

# Linear Layer 3 Weight Updates
W_3_sgd_step = W_3.assign_sub(tf.scalar_mul(0.005, dLdW_3))
W0_3_sgd_step = W0_3.assign_sub(tf.scalar_mul(0.005, dLdW0_3))

# Grouped
sgd_step = tf.group(W_3_sgd_step, W0_3_sgd_step, 
                    W_2_sgd_step, W0_2_sgd_step, 
                    W_1_sgd_step, W0_1_sgd_step)