def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = input.get_shape()[-1] # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: kernel = self.make_var("weights", shape=[k_h, k_w, c_i / group, c_o]) if group == 1: # This is the common-case. Convolve the input without any further complications. output = convolve(input, kernel) else: # Split the input into groups and then convolve each of them independently input_groups = tf.split(3, group, input) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] # Concatenate the groups output = tf.concat(3, output_groups) # Add the biases if biased: biases = self.make_var("biases", [c_o]) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output
def compute_IOU(bboxA, bboxB): """Compute the Intersection Over Union. Args: bboxA: [N X 4 tensor] format = [left, top, right, bottom] bboxB: [N X 4 tensor] Return: IOU: [N X 1 tensor] """ x1A, y1A, x2A, y2A = tf.split(1, 4, bboxA) x1B, y1B, x2B, y2B = tf.split(1, 4, bboxB) # compute intersection x1_max = tf.maximum(x1A, x1B) y1_max = tf.maximum(y1A, y1B) x2_min = tf.minimum(x2A, x2B) y2_min = tf.minimum(y2A, y2B) # overlap_flag = tf.logical_and( tf.less(x1_max, x2_min), tf.less(y1_max, y2_min)) overlap_flag = tf.to_float(tf.less(x1_max, x2_min)) * \ tf.to_float(tf.less(y1_max, y2_min)) overlap_area = tf.mul(overlap_flag, tf.mul( x2_min - x1_max, y2_min - y1_max)) # compute union areaA = tf.mul(x2A - x1A, y2A - y1A) areaB = tf.mul(x2B - x1B, y2B - y1B) union_area = areaA + areaB - overlap_area return tf.div(overlap_area, union_area)
def make_example_dict(example_protos, example_weights): def parse_examples(example_protos): features = { "target": tf.FixedLenFeature(shape=[1], dtype=tf.float32, default_value=0), "age_indices": tf.VarLenFeature(dtype=tf.int64), "age_values": tf.VarLenFeature(dtype=tf.float32), "gender_indices": tf.VarLenFeature(dtype=tf.int64), "gender_values": tf.VarLenFeature(dtype=tf.float32), } return tf.parse_example([e.SerializeToString() for e in example_protos], features) parsed = parse_examples(example_protos) sparse_features = [ SparseFeatureColumn( tf.reshape(tf.split(1, 2, parsed["age_indices"].indices)[0], [-1]), tf.reshape(parsed["age_indices"].values, [-1]), tf.reshape(parsed["age_values"].values, [-1]), ), SparseFeatureColumn( tf.reshape(tf.split(1, 2, parsed["gender_indices"].indices)[0], [-1]), tf.reshape(parsed["gender_indices"].values, [-1]), tf.reshape(parsed["gender_values"].values, [-1]), ), ] return dict( sparse_features=sparse_features, dense_features=[], example_weights=example_weights, example_labels=tf.reshape(parsed["target"], [-1]), example_ids=["%d" % i for i in range(0, len(example_protos))], )
def _composition_function(self, inputs, length, init_state=None): if self._composition == "GRU": cell = GRUCell(self._size) return dynamic_rnn(cell, inputs, sequence_length=length, time_major=True, initial_state=init_state, dtype=tf.float32)[0] elif self._composition == "LSTM": cell = BasicLSTMCell(self._size) init_state = tf.concat(1, [tf.zeros_like(init_state, tf.float32), init_state]) if init_state else None outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True, initial_state=init_state, dtype=tf.float32)[0] return outs elif self._composition == "BiGRU": cell = GRUCell(self._size // 2, self._size) init_state_fw, init_state_bw = tf.split(1, 2, init_state) if init_state else (None, None) with tf.variable_scope("forward"): fw_outs = dynamic_rnn(cell, inputs, sequence_length=length, time_major=True, initial_state=init_state_fw, dtype=tf.float32)[0] with tf.variable_scope("backward"): rev_inputs = tf.reverse_sequence(tf.pack(inputs), length, 0, 1) rev_inputs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), rev_inputs)] bw_outs = dynamic_rnn(cell, rev_inputs, sequence_length=length, time_major=True, initial_state=init_state_bw, dtype=tf.float32)[0] bw_outs = tf.reverse_sequence(tf.pack(bw_outs), length, 0, 1) bw_outs = [tf.reshape(x, [-1, self._size]) for x in tf.split(0, len(inputs), bw_outs)] return [tf.concat(1, [fw_out, bw_out]) for fw_out, bw_out in zip(fw_outs, bw_outs)] else: raise NotImplementedError("Other compositions not implemented yet.")
def build(self): """None Build the model graph :return: """ with tf.name_scope('G_'): self.predict_g = self.__G__() self.predict_g2 = self.__G2__() with tf.name_scope('D_'): # Create reference examples # Input d holds real&imaginary values. The discriminative decision based on reconstructed image self.reconstructed_image_reference = self.get_reconstructed_image(real=self.input_d['real'], imag=self.input_d['imag'], name='Both_gt') predict_g2_stacked = tf.stack([self.predict_g2['real'][:,0,:,:], self.predict_g2['imag'][:,0,:,:]], axis=1) self.predict, self.predict_logits = self.__D__([self.reconstructed_image_reference, predict_g2_stacked]) self.predict_d, self.predict_d_for_g = tf.split(value=self.predict, num_or_size_splits=2, axis=0) self.predict_d_logits, self.predict_d_logits_for_g = tf.split(value=self.predict_logits, num_or_size_splits=2, axis=0) self.clip_weights = self.__clip_weights__() with tf.name_scope('loss'): # self.loss_g = self.__loss_g__(predict=self.predict_g, self.labels, reg=self.regularization_sum) self.__loss__() with tf.name_scope('training'): self.train_op_d, self.train_op_g = self.__training__(learning_rate=self.FLAGS.learning_rate) with tf.name_scope('evaluation'): # Calculate accuracy L2 norm self.evaluation = self.__evaluation__(predict=self.predict_g, labels=self.labels)
def build(self): """None Build the model graph :return: """ with tf.name_scope('G_'): self.predict_g = self.__G__() with tf.name_scope('D_'): self.predict, self.predict_logits = self.__D__([self.input_d, self.predict_g], input_type="Real") self.predict_d, self.predict_d_for_g = tf.split(value=self.predict, num_or_size_splits=2, axis=0) self.predict_d_logits, self.predict_d_logits_for_g = tf.split(value=self.predict_logits, num_or_size_splits=2, axis=0) # self.predict_d, self.predict_d_logits # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.predict_d_for_g, self.predict_d_logits_for_g = self.__D__(self.predict_g, input_type="Gen") if len(self.regularization_values_d) > 0: self.regularization_sum_d = sum(self.regularization_values_d) with tf.name_scope('loss'): # self.loss_g = self.__loss_g__(predict=self.predict_g, self.labels, reg=self.regularization_sum) self.__loss__() with tf.name_scope('training'): self.train_op_d, self.train_op_g = self.__training__(learning_rate=self.FLAGS.learning_rate) with tf.name_scope('evaluation'): # Calculate accuracy L2 norm self.evaluation = self.__evaluation__(predict=self.predict_g, labels=self.labels)
def _g_recurrence_1(i, x_t, input_x, gen_x, h_tm1, h_tm1_manager, last_goal, real_goal, give_num): cur_sen = \ tf.split(tf.concat([tf.split(input_x, [i, self.sequence_length - i], 1)[0], self.padding_array], 1), [self.sequence_length, i], 1)[0] with tf.variable_scope(self.scope): feature = self.FeatureExtractor_unit(cur_sen, self.drop_out) h_t_manager = self.g_manager_recurrent_unit(feature, h_tm1_manager) sub_goal = self.g_manager_output_unit(h_t_manager) sub_goal = tf.nn.l2_normalize(sub_goal, 1) h_t_Worker = tf.cond(i > 0, lambda: self.g_worker_recurrent_unit(x_t, h_tm1), lambda: h_tm1) # hidden_memory_tuple real_sub_goal = tf.cond(i > 0, lambda: tf.add(last_goal, sub_goal), lambda: real_goal) # real_goal_array = real_goal_array.write(i, real_sub_goal) x_tp1 = tf.cond(i > 0, lambda: ta_emb_x.read(i - 1), lambda: x_t) # hidden_memory_tuple with tf.control_dependencies([cur_sen]): gen_x = tf.cond(i > 0, lambda: gen_x.write(i - 1, ta_x.read(i - 1)), lambda: gen_x) return i + 1, x_tp1, input_x, gen_x, h_t_Worker, h_t_manager, \ tf.cond(((i) % self.step_size) > 0, lambda: real_sub_goal, lambda: tf.constant(0.0, shape=[self.batch_size, self.goal_out_size])), \ tf.cond(((i) % self.step_size) > 0, lambda: real_goal, lambda: real_sub_goal), give_num
def testSymbolModalityTargetsFactored(self): batch_size = 10 num_datashards = 5 length = 6 height = 7 hidden_size = 9 vocab_size = 11 model_hparams = common_hparams.basic_params1() model_hparams.factored_logits = True model_hparams.hidden_size = hidden_size model_hparams.mode = tf.estimator.ModeKeys.TRAIN body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, height, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism( ["/device:CPU:0"] * num_datashards) with self.test_session() as session: sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) sharded_logits = m.top_sharded(sharded_body_output, sharded_targets, data_parallelism) train_loss = m.loss_sharded(sharded_logits, sharded_targets, data_parallelism) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res1, res2 = session.run((logits, train_loss)) self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size)) self.assertEqual(res2.shape, ())
def test_backward_grads_with_nativepy(self): if not tf.test.is_gpu_available(): self.skipTest("GPU not available") input_shape = (128, 8, 8) data_shape = (16,) + input_shape x = tf.random_normal(shape=data_shape, dtype=tf.float64) dy = tf.random_normal(shape=data_shape, dtype=tf.float64) dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=1) block = blocks.RevBlock( n_res=3, filters=128, strides=(1, 1), input_shape=input_shape, fused=False, dtype=tf.float64) with tf.GradientTape() as tape: tape.watch(x) x1, x2 = tf.split(x, num_or_size_splits=2, axis=1) y1, y2 = block((x1, x2), training=True) y = tf.concat((y1, y2), axis=1) # Compute true grads dx_true = tape.gradient(y, x, output_gradients=dy) # Compute grads from reconstruction (dx1, dx2), _ = block.backward_grads( x=(x1, x2), y=(y1, y2), dy=(dy1, dy2), training=True) dx = tf.concat((dx1, dx2), axis=1) thres = 1e-5 diff_abs = tf.reshape(abs(dx - dx_true), [-1]) assert all(diff_abs < thres)
def _model_fn(features, labels, mode, params): model_fn = MODELS[FLAGS.model].model_fn global_step = tf.train.get_or_create_global_step() if FLAGS.num_gpus > 0 and mode == learn.ModeKeys.TRAIN: split_features = {k: tf.split(v, FLAGS.num_gpus) for k, v in features.iteritems()} split_labels = {k: tf.split(v, FLAGS.num_gpus) for k, v in labels.iteritems()} grads = [] predictions = collections.defaultdict(list) losses = [] opt = ops.create_optimizer( params.optimizer, params.learning_rate, params.decay_steps) for i in range(FLAGS.num_gpus): with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)): with tf.name_scope('tower_%d' % i): with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0): device_features = {k: v[i] for k, v in split_features.iteritems()} device_labels = {k: v[i] for k, v in split_labels.iteritems()} device_predictions, device_loss = model_fn( device_features, device_labels, mode, params) for k, v in device_predictions.iteritems(): predictions[k].append(v) if device_loss is not None: losses.append(device_loss) device_grads = opt.compute_gradients(device_loss) grads.append(device_grads) grads = ops.average_gradients(grads) train_op = opt.apply_gradients(grads, global_step=global_step) for k, v in predictions.iteritems(): predictions[k] = tf.concat(v, axis=0) loss = tf.add_n(losses) if losses else None else: with tf.device(tf.DeviceSpec(device_type='GPU', device_index=0)): predictions, loss = model_fn(features, labels, mode, params) train_op = None if mode == learn.ModeKeys.TRAIN: opt = ops.create_optimizer( params.optimizer, params.learning_rate, params.decay_steps) train_op = opt.minimize(loss, global_step=global_step) tf.summary.scalar('loss/loss', loss) return tf.contrib.learn.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def backward_grads(self, y, dy, training=True): """Manually compute backward gradients given input and output grads.""" dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=self.axis) y1, y2 = tf.split(y, num_or_size_splits=2, axis=self.axis) with tf.GradientTape() as gtape: gtape.watch(y1) gy1 = self.g(y1, training=training) grads_combined = gtape.gradient( gy1, [y1] + self.g.trainable_variables, output_gradients=dy2) dg = grads_combined[1:] dx1 = dy1 + grads_combined[0] # This doesn't affect eager execution, but improves memory efficiency with # graphs with tf.control_dependencies(dg + [dx1]): x2 = y2 - gy1 with tf.GradientTape() as ftape: ftape.watch(x2) fx2 = self.f(x2, training=training) grads_combined = ftape.gradient( fx2, [x2] + self.f.trainable_variables, output_gradients=dx1) df = grads_combined[1:] dx2 = dy2 + grads_combined[0] # Same behavior as above with tf.control_dependencies(df + [dx2]): x1 = y1 - fx2 x = tf.concat([x1, x2], axis=self.axis) dx = tf.concat([dx1, dx2], axis=self.axis) grads = df + dg return x, dx, grads
def infer(self, features, *args, **kwargs): # pylint: disable=arguments-differ """Produce predictions from the model.""" del args, kwargs # Inputs and features preparation needed to handle edge cases. if not features: features = {} inputs_old = None if "inputs" in features and len(features["inputs"].shape) < 4: inputs_old = features["inputs"] features["inputs"] = tf.expand_dims(features["inputs"], 2) # Set targets to input size firts. features["targets"] = tf.zeros_like(features["inputs"]) self._encode_on_predict = True logits, _ = self(features) # pylint: disable=not-callable if self.hparams.gan_loss_factor != 0: logits, _ = tf.split(logits, 2, axis=0) # Remove GAN. logits, _ = tf.split(logits, 2, axis=0) # Targets and inputs from encoding. # Uncomment the line below to get reconstructed inputs instead of targets. # (and comment out the line above at the same time). # _, logits = tf.split(logits, 2, axis=0) samples = tf.argmax(logits, axis=-1) # Restore inputs to not confuse Estimator in edge cases. if inputs_old is not None: features["inputs"] = inputs_old # Return samples. return samples
def call(self, x, mask=None): """Execute this layer on input tensors. x = [atom_features, atom_mask] Parameters ---------- x: list Tensors as listed above mask: bool, optional Ignored. Present only to shadow superclass call() method. Returns ------- outputs: Tensor Tensor of concatenated atom features """ self.build() atom_features = x[0] atom_masks = x[1] A = tf.split(atom_features, self.batch_size, axis=0) A_mask = tf.split( tf.cast(atom_masks, dtype=tf.bool), self.batch_size, axis=0) outputs = tf.concat( [tf.boolean_mask(A[i], A_mask[i]) for i in range(len(A))], axis=0) outputs = tf.matmul(outputs, self.W) + self.b outputs = self.activation(outputs) return outputs
def lnlstm(xs, ms, s, scope, nh, init_scale=1.0): nbatch, nin = [v.value for v in xs[0].get_shape()] with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) gx = tf.get_variable("gx", [nh*4], initializer=tf.constant_initializer(1.0)) bx = tf.get_variable("bx", [nh*4], initializer=tf.constant_initializer(0.0)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) gh = tf.get_variable("gh", [nh*4], initializer=tf.constant_initializer(1.0)) bh = tf.get_variable("bh", [nh*4], initializer=tf.constant_initializer(0.0)) b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0)) gc = tf.get_variable("gc", [nh], initializer=tf.constant_initializer(1.0)) bc = tf.get_variable("bc", [nh], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, (x, m) in enumerate(zip(xs, ms)): c = c*(1-m) h = h*(1-m) z = _ln(tf.matmul(x, wx), gx, bx) + _ln(tf.matmul(h, wh), gh, bh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f*c + i*u h = o*tf.tanh(_ln(c, gc, bc)) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s
def add_training_loss(self, final_loss, logits): """Computes loss using logits.""" loss_fn = get_loss_fn(final_loss) # Get loss function task_losses = [] # label_placeholder of shape (batch_size, n_tasks). Split into n_tasks # tensors of shape (batch_size,) task_labels = tf.split( axis=1, num_or_size_splits=self.n_tasks, value=self.label_placeholder) task_weights = tf.split( axis=1, num_or_size_splits=self.n_tasks, value=self.weight_placeholder) for task in range(self.n_tasks): task_label_vector = task_labels[task] task_weight_vector = task_weights[task] # Convert the labels into one-hot vector encodings. one_hot_labels = tf.to_float( tf.one_hot(tf.to_int32(tf.squeeze(task_label_vector)), 2)) # Since we use tf.nn.softmax_cross_entropy_with_logits note that we pass in # un-softmaxed logits rather than softmax outputs. task_loss = loss_fn(logits[task], one_hot_labels, task_weight_vector) task_losses.append(task_loss) # It's ok to divide by just the batch_size rather than the number of nonzero # examples (effect averages out) total_loss = tf.add_n(task_losses) total_loss = tf.div(total_loss, self.batch_size) return total_loss
def remove_channels(x, data_format='NHWC'): b, h, w, c = get_conv_shape(x, data_format) if data_format == 'NCHW': x, _ = tf.split(x, [3, -1], axis=1) else: x, _ = tf.split(x, [3, -1], axis=3) return x
def testSymbolModalityTargets(self): batch_size = 10 num_datashards = 5 length = 6 height = 7 hidden_size = 9 vocab_size = 11 model_hparams = tf.contrib.training.HParams( symbol_modality_num_shards=4, hidden_size=hidden_size, label_smoothing=0.2, shared_embedding_and_softmax_weights=0) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, height, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism( ["/device:CPU:0"] * num_datashards, reuse=True) with self.test_session() as session: sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) sharded_logits, train_loss = m.top_sharded( sharded_body_output, sharded_targets, data_parallelism) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res1, res2 = session.run((logits, train_loss)) self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size)) self.assertEqual(res2.shape, ())
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = tf.split(state, 2, 1) x_size = x.get_shape().as_list()[1] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) # Keep W_xh and W_hh separate here as well to use different init methods. w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) bias = tf.get_variable( 'bias', [4 * self.num_units], initializer=tf.constant_initializer(0.0)) concat = tf.concat([x, h], 1) w_full = tf.concat([w_xh, w_hh], 0) hidden = tf.matmul(concat, w_full) + bias i, j, f, o = tf.split(hidden, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat([new_c, new_h], 1) # fuk tuples.
def forward_backward(self, obs_prob_seq): """ runs forward backward algorithm on observation sequence Arguments --------- - obs_seq : matrix of size N by S, where N is number of timesteps and S is the number of states Returns ------- - forward : matrix of size N by S representing the forward probability of each state at each time step - backward : matrix of size N by S representing the backward probability of each state at each time step - posterior : matrix of size N by S representing the posterior probability of each state at each time step """ obs_prob_list_for = tf.split(0, self.N, obs_prob_seq) with tf.name_scope('forward_belief_propagation'): # forward belief propagation self._forward(obs_prob_list_for) obs_prob_seq_rev = tf.reverse(obs_prob_seq, [True, False]) obs_prob_list_back = tf.split(0, self.N, obs_prob_seq_rev) with tf.name_scope('backward_belief_propagation'): # backward belief propagation self._backward(obs_prob_list_back)
def minibatch(self, dataset, subset, use_datasets, cache_data, shift_ratio=-1): """Get synthetic image batches. """ del subset, use_datasets, cache_data, shift_ratio input_shape = [self.batch_size, self.height, self.width, self.depth] images = tf.truncated_normal( input_shape, dtype=self.dtype, stddev=1e-1, name='synthetic_images') labels = tf.random_uniform( [self.batch_size], minval=0, maxval=dataset.num_classes - 1, dtype=tf.int32, name='synthetic_labels') # Note: This results in a H2D copy, but no computation # Note: This avoids recomputation of the random values, but still # results in a H2D copy. images = tf.contrib.framework.local_variable(images, name='images') labels = tf.contrib.framework.local_variable(labels, name='labels') if self.num_splits == 1: images_splits = [images] labels_splits = [labels] else: images_splits = tf.split(images, self.num_splits, 0) labels_splits = tf.split(labels, self.num_splits, 0) return images_splits, labels_splits
def __call__(self, x, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): h, c = tf.split(state, 2, 1) h_size = self.num_units x_size = x.get_shape().as_list()[1] batch_size = x.get_shape().as_list()[0] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) concat = tf.concat([x, h], 1) # concat for speed. w_full = tf.concat([w_xh, w_hh], 0) concat = tf.matmul(concat, w_full) #+ bias # live life without garbage. # i = input_gate, j = new_input, f = forget_gate, o = output_gate concat = layer_norm_all(concat, batch_size, 4, h_size, 'ln_all') i, j, f, o = tf.split(concat, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(layer_norm(new_c, h_size, 'ln_c')) * tf.sigmoid(o) return new_h, tf.concat([new_h, new_c], 1)
def _read(self, keys, redundant_states): read = _comp_mul(keys, redundant_states) if self._num_copies > 1: xs_real = tf.split(1, self._num_copies, _comp_real(read)) xs_imag = tf.split(1, self._num_copies, _comp_imag(read)) read = (tf.add_n(xs_real)/self._num_copies, tf.add_n(xs_imag)/self._num_copies) return read
def decode_bbox_target(box_predictions, anchors): """ Args: box_predictions: (..., 4), logits anchors: (..., 4), floatbox. Must have the same shape Returns: box_decoded: (..., 4), float32. With the same shape. """ orig_shape = tf.shape(anchors) box_pred_txtytwth = tf.reshape(box_predictions, (-1, 2, 2)) box_pred_txty, box_pred_twth = tf.split(box_pred_txtytwth, 2, axis=1) # each is (...)x1x2 anchors_x1y1x2y2 = tf.reshape(anchors, (-1, 2, 2)) anchors_x1y1, anchors_x2y2 = tf.split(anchors_x1y1x2y2, 2, axis=1) waha = anchors_x2y2 - anchors_x1y1 xaya = (anchors_x2y2 + anchors_x1y1) * 0.5 clip = np.log(config.PREPROC.MAX_SIZE / 16.) wbhb = tf.exp(tf.minimum(box_pred_twth, clip)) * waha xbyb = box_pred_txty * waha + xaya x1y1 = xbyb - wbhb * 0.5 x2y2 = xbyb + wbhb * 0.5 # (...)x1x2 out = tf.concat([x1y1, x2y2], axis=-2) return tf.reshape(out, orig_shape)
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(self, scope or "basic_lstm_cell", reuse=self._reuse): # Parameters of gates are concatenated into one multiply for # efficiency. if self._state_is_tuple: c_prev, h_prev = state else: c_prev, h_prev = tf.split( value=state, num_or_size_splits=2, axis=1) concat = tf.contrib.rnn._linear( [inputs, h_prev], 4 * self._num_units, True) # i = input_gate, g = new_input, f = forget_gate, o = output_gate i, g, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1) c = (c_prev * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(g)) h = tf.tanh(c) * tf.sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(c, h) else: new_state = tf.concat([c, h], 1) return h, new_state
def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1): self.validate_padding(padding) c_i = input.get_shape()[-1] assert c_i % group == 0 assert c_o % group == 0 convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) biases = self.make_var('biases', [c_o]) if group == 1: conv = convolve(input, kernel) else: input_groups = tf.split(3, group, input) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] conv = tf.concat(3, output_groups) if relu: bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list()) return tf.nn.relu(bias, name=scope.name) return tf.reshape( tf.nn.bias_add(conv, biases), conv.get_shape().as_list(), name=scope.name)
def build_network(self): net_tensors = self.net_tensors with self.net_graph.as_default(), tf.device(self.net_device): logits = tf.placeholder(dtype=tf.float32, shape=(self.batch_size, self.image_classes)) labels = tf.placeholder(dtype=tf.int32, shape=(self.batch_size,)) lambs = tf.placeholder(dtype=tf.float32, shape=(self.image_classes,)) # put a sigfunction on logits and then transpose logits = tf.transpose(framwork.sig_func(logits)) # according to the labels, erase rows which is not in labels labels_unique = tf.constant(range(self.image_classes), dtype=tf.int32) labels_num = self.image_classes logits = tf.gather(logits, indices=labels_unique) lambs = tf.gather(lambs, indices=labels_unique) # set the value of each row to True when it occurs in labels templete = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, self.batch_size]) labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1]) indict_logic = tf.equal(labels_expand, templete) # split the tensor along rows logit_list = tf.split(0, labels_num, logits) indict_logic_list = tf.split(0, labels_num, indict_logic) lamb_list = tf.split(0, self.image_classes, lambs) logit_list = [tf.squeeze(item) for item in logit_list] indict_logic_list = [tf.squeeze(item) for item in indict_logic_list] left_right_tuples = list() for i in range(self.image_classes): left_right_tuples.append(framwork.lamb_func(logit_list[i], indict_logic_list[i], lamb=lamb_list[i])) # func = framwork.lamb_func() # left_right_tuples = map(func, logit_list, indict_logic_list, lamb_list) net_tensors.update({'left_right_tuples': left_right_tuples, 'logits': logits, 'labels': labels, 'lambs': lambs})
def build_loss(self, logits, labels, lambs): # put a sigfunction on logits and then transpose logits = tf.transpose(framwork.sig_func(logits)) # according to the labels, erase rows which is not in labels labels_unique = tf.constant(range(self.image_classes), dtype=tf.int32) labels_num = self.image_classes logits = tf.gather(logits, indices=labels_unique) lambs = tf.gather(lambs, indices=labels_unique) # set the value of each row to True when it occurs in labels template = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, self.batch_size]) labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1]) indict_logic = tf.equal(labels_expand, template) # split the tensor along rows logit_list = tf.split(0, labels_num, logits) indict_logic_list = tf.split(0, labels_num, indict_logic) lambda_list = tf.split(0, self.image_classes, lambs) # loss_list = list() # for i in range(self.image_classes): # loss_list.append(framwork.loss_func(logit_list[i], indict_logic_list[i], lambda_list[i])) loss_list = map(framwork.loss_func, logit_list, indict_logic_list, lambda_list) loss = tf.add_n(loss_list) tensors_dict = {'labels_unique': labels_unique, 'template': template, 'logits_sig_trans': logits, 'loss': loss, 'indict_logic': indict_logic} self.tensors_names.extend(tensors_dict.keys()) self.net_tensors.update(tensors_dict)
def call(self, x, h): channels = x.shape[self._feature_axis].value with tf.variable_scope('gates'): inputs = tf.concat([x, h], axis=self._feature_axis) n = channels + self._filters m = 2 * self._filters if self._filters > 1 else 2 W = tf.get_variable('kernel', self._kernel + [n, m]) y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format) if self._normalize: r, u = tf.split(y, 2, axis=self._feature_axis) r = tf.contrib.layers.layer_norm(r) u = tf.contrib.layers.layer_norm(u) else: y += tf.get_variable('bias', [m], initializer=tf.ones_initializer()) r, u = tf.split(y, 2, axis=self._feature_axis) r, u = tf.sigmoid(r), tf.sigmoid(u) # TODO #tf.summary.histogram('reset_gate', r) #tf.summary.histogram('update_gate', u) with tf.variable_scope('candidate'): inputs = tf.concat([x, r * h], axis=self._feature_axis) n = channels + self._filters m = self._filters W = tf.get_variable('kernel', self._kernel + [n, m]) y = tf.nn.convolution(inputs, W, 'SAME', data_format=self._data_format) if self._normalize: y = tf.contrib.layers.layer_norm(y) else: y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer()) h = u * h + (1 - u) * self._activation(y) return h, h
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = tf.split(1, 2, state) concat = linear.linear([inputs, h], 4 * self._num_units, True) fs = [] # This can be made more efficient since we're doing more than needs to be # done, but for now w/e for child_state in child_states: c_k, h_k = tf.split(1, 2, child_state) concat = linear.linear([inputs, h_k], 4 * self._num_units, True) i_k, j_k, f_k, o_k = tf.split(1, 4, concat) fs.append(f_k) # i = input_gate, j = new_input, f = forget_gate, o = output_gate # TODO: forget gate for each child, probably need to split by number # of child states or something i, j, f, o = tf.split(1, 4, concat) # If no children just treat it like a regular lstm if not fs: fs.append(f) new_c = sum(c * tf.sigmoid(fs + self._forget_bias)) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat(1, [new_c, new_h])
def build_loss(self, out, out_tensor): """Build a loss function and accuracy for the model.""" print(' Building loss and accuracy') with tf.variable_scope('accuracy'): argmax = tf.to_int32(tf.argmax(out_tensor, 2)) correct = tf.to_float(tf.equal(argmax, self.ts)) * self.t_mask accuracy = tf.reduce_sum(correct) / tf.reduce_sum(self.t_mask) with tf.variable_scope('loss'): with tf.variable_scope('split_t_and_mask'): split_kwargs = { 'split_dim': 1, 'num_split': self.max_t_seq_len } ts = tf.split(value=self.ts, **split_kwargs) t_mask = tf.split(value=self.t_mask, **split_kwargs) t_mask = [tf.squeeze(weight) for weight in t_mask] loss = seq2seq.sequence_loss(out, ts, t_mask, self.max_t_seq_len) with tf.variable_scope('regularization'): regularize = tf.contrib.layers.l2_regularizer(self.reg_scale) params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) reg_term = sum([regularize(param) for param in params]) loss += reg_term return loss, accuracy
def _build_base_rnn(self, inputs, input_seq_lengths, forward_only=True): """ Build the Language RNN Parameters ---------- :param inputs: inputs to the RNN :param input_seq_lengths: vector containing the length of each input from 'inputs' :param forward_only: whether the RNN will be used for training or not (if true then add a dropout layer) Returns ---------- :returns logits: each char probability for each timestep of the input, for each item of the batch :returns prediction: the best prediction for the input :returns rnn_keep_state_op: a tensorflow op to save the RNN internal state for the next batch :returns rnn_state_zero_op: a tensorflow op to reset the RNN internal state to zeros :returns input_keep_prob_ph: a placeholder for input_keep_prob of the dropout layer (None if forward_only is True) :returns output_keep_prob_ph: a placeholder for output_keep_prob of the dropout layer (None if forward_only is True) :returns rnn_tuple_state: the RNN internal state """ # Define a variable to keep track of the learning process step global_step = tf.Variable(0, trainable=False, name='global_step') # If building the RNN for training then create dropout rate placeholders input_keep_prob_ph = output_keep_prob_ph = None if not forward_only: with tf.name_scope('dropout'): # Create placeholders, used to override values when running on the test set input_keep_prob_ph = tf.placeholder(tf.float32) output_keep_prob_ph = tf.placeholder(tf.float32) # Define cells of language model with tf.variable_scope('LSTM'): # Create each layer layers_list = [] for _ in range(self.num_layers): cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_size, state_is_tuple=True) # If building the RNN for training then add a dropoutWrapper to the cells if not forward_only: with tf.name_scope('dropout'): cell = tf.contrib.rnn.DropoutWrapper( cell, input_keep_prob=input_keep_prob_ph, output_keep_prob=output_keep_prob_ph) layers_list.append(cell) # Store the layers in a multi-layer RNN cell = tf.contrib.rnn.MultiRNNCell(layers_list, state_is_tuple=True) # Build the input layer between input and the RNN with tf.variable_scope('Input_Layer'): w_i = tf.get_variable( "input_w", [self.input_dim, self.hidden_size], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b_i = tf.get_variable("input_b", [self.hidden_size], tf.float32, initializer=tf.constant_initializer(0.0)) # Apply the input layer to the network input to produce the input for the rnn part of the network rnn_inputs = [ tf.matmul(tf.squeeze(tf.cast(i, tf.float32), axis=[0]), w_i) + b_i for i in tf.split(axis=0, num_or_size_splits=self.max_input_seq_length, value=inputs) ] # Switch from a list to a tensor rnn_inputs = tf.stack(rnn_inputs) # Define some variables to store the RNN state # Note : tensorflow keep the state inside a batch but it's necessary to do this in order to keep the state # between batches, especially when doing live transcript # Another way would have been to get the state as an output of the session and feed it every time but # this way is much more efficient with tf.variable_scope('Hidden_state'): state_variables = [] for state_c, state_h in cell.zero_state(self.batch_size, tf.float32): state_variables.append( tf.nn.rnn_cell.LSTMStateTuple( tf.Variable(state_c, trainable=False), tf.Variable(state_h, trainable=False))) # Return as a tuple, so that it can be fed to dynamic_rnn as an initial state rnn_tuple_state = tuple(state_variables) # Build the RNN with tf.name_scope('LSTM'): rnn_output, new_states = tf.nn.dynamic_rnn( cell, rnn_inputs, sequence_length=input_seq_lengths, initial_state=rnn_tuple_state, time_major=True) # Define an op to keep the hidden state between batches update_ops = [] for state_variable, new_state in zip(rnn_tuple_state, new_states): # Assign the new state to the state variables on this layer update_ops.extend([ state_variable[0].assign(new_state[0]), state_variable[1].assign(new_state[1]) ]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. rnn_keep_state_op = tf.tuple(update_ops) # Define an op to reset the hidden state to zeros update_ops = [] for state_variable in rnn_tuple_state: # Assign the new state to the state variables on this layer update_ops.extend([ state_variable[0].assign(tf.zeros_like(state_variable[0])), state_variable[1].assign(tf.zeros_like(state_variable[1])) ]) # Return a tuple in order to combine all update_ops into a single operation. # The tuple's actual value should not be used. rnn_state_zero_op = tf.tuple(update_ops) # Build the output layer between the RNN and the char_map with tf.variable_scope('Output_layer'): w_o = tf.get_variable( "output_w", [self.hidden_size, self.num_labels], tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b_o = tf.get_variable("output_b", [self.num_labels], tf.float32, initializer=tf.constant_initializer(0.0)) # Compute the logits (each char probability for each timestep of the input, for each item of the batch) logits = tf.stack([ tf.matmul(tf.squeeze(i, axis=[0]), w_o) + b_o for i in tf.split(axis=0, num_or_size_splits=self.max_input_seq_length, value=rnn_output) ]) # Compute the prediction which is the best "path" of probabilities for each item of the batch decoded, _log_prob = tf.nn.ctc_beam_search_decoder( logits, input_seq_lengths) # Set the RNN result to the best path found prediction = tf.to_int32(decoded[0]) return global_step, logits, prediction, rnn_keep_state_op, rnn_state_zero_op, \ input_keep_prob_ph, output_keep_prob_ph, rnn_tuple_state
def learn(make_env, make_policy, *, n_episodes, horizon, delta, gamma, max_iters, sampler=None, use_natural_gradient=False, #can be 'exact', 'approximate' fisher_reg=1e-2, iw_method='is', iw_norm='none', bound='J', line_search_type='parabola', save_weights=False, improvement_tol=0., center_return=False, render_after=None, max_offline_iters=100, callback=None, clipping=False, entropy='none', positive_return=False, reward_clustering='none'): np.set_printoptions(precision=3) max_samples = horizon * n_episodes if line_search_type == 'binary': line_search = line_search_binary elif line_search_type == 'parabola': line_search = line_search_parabola else: raise ValueError() # Building the environment env = make_env() ob_space = env.observation_space ac_space = env.action_space # Building the policy pi = make_policy('pi', ob_space, ac_space) oldpi = make_policy('oldpi', ob_space, ac_space) all_var_list = pi.get_trainable_variables() var_list = [v for v in all_var_list if v.name.split('/')[1].startswith('pol')] shapes = [U.intprod(var.get_shape().as_list()) for var in var_list] n_parameters = sum(shapes) # Placeholders ob_ = ob = U.get_placeholder_cached(name='ob') ac_ = pi.pdtype.sample_placeholder([max_samples], name='ac') mask_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='mask') rew_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='rew') disc_rew_ = tf.placeholder(dtype=tf.float32, shape=(max_samples), name='disc_rew') clustered_rew_ = tf.placeholder(dtype=tf.float32, shape=(n_episodes)) gradient_ = tf.placeholder(dtype=tf.float32, shape=(n_parameters, 1), name='gradient') iter_number_ = tf.placeholder(dtype=tf.int32, name='iter_number') losses_with_name = [] # Policy densities target_log_pdf = pi.pd.logp(ac_) behavioral_log_pdf = oldpi.pd.logp(ac_) log_ratio = target_log_pdf - behavioral_log_pdf # Split operations disc_rew_split = tf.stack(tf.split(disc_rew_ * mask_, n_episodes)) rew_split = tf.stack(tf.split(rew_ * mask_, n_episodes)) log_ratio_split = tf.stack(tf.split(log_ratio * mask_, n_episodes)) target_log_pdf_split = tf.stack(tf.split(target_log_pdf * mask_, n_episodes)) behavioral_log_pdf_split = tf.stack(tf.split(behavioral_log_pdf * mask_, n_episodes)) mask_split = tf.stack(tf.split(mask_, n_episodes)) # Renyi divergence emp_d2_split = tf.stack(tf.split(pi.pd.renyi(oldpi.pd, 2) * mask_, n_episodes)) emp_d2_cum_split = tf.reduce_sum(emp_d2_split, axis=1) empirical_d2 = tf.reduce_mean(tf.exp(emp_d2_cum_split)) # Return ep_return = clustered_rew_ #tf.reduce_sum(mask_split * disc_rew_split, axis=1) if clipping: rew_split = tf.clip_by_value(rew_split, -1, 1) if center_return: ep_return = ep_return - tf.reduce_mean(ep_return) rew_split = rew_split - (tf.reduce_sum(rew_split) / (tf.reduce_sum(mask_split) + 1e-24)) discounter = [pow(gamma, i) for i in range(0, horizon)] # Decreasing gamma discounter_tf = tf.constant(discounter) disc_rew_split = rew_split * discounter_tf #tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return_not_clustered', summarize=20)) # Reward clustering ''' rew_clustering_options = reward_clustering.split(':') if reward_clustering == 'none': pass # Do nothing elif rew_clustering_options[0] == 'global': assert len(rew_clustering_options) == 2, "Reward clustering: Provide the correct number of parameters" N = int(rew_clustering_options[1]) tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return', summarize=20)) global_rew_min = tf.Variable(float('+inf'), trainable=False) global_rew_max = tf.Variable(float('-inf'), trainable=False) rew_min = tf.reduce_min(ep_return) rew_max = tf.reduce_max(ep_return) global_rew_min = tf.assign(global_rew_min, tf.minimum(global_rew_min, rew_min)) global_rew_max = tf.assign(global_rew_max, tf.maximum(global_rew_max, rew_max)) interval_size = (global_rew_max - global_rew_min) / N ep_return = tf.floordiv(ep_return, interval_size) * interval_size elif rew_clustering_options[0] == 'batch': assert len(rew_clustering_options) == 2, "Reward clustering: Provide the correct number of parameters" N = int(rew_clustering_options[1]) rew_min = tf.reduce_min(ep_return) rew_max = tf.reduce_max(ep_return) interval_size = (rew_max - rew_min) / N ep_return = tf.floordiv(ep_return, interval_size) * interval_size elif rew_clustering_options[0] == 'manual': assert len(rew_clustering_options) == 4, "Reward clustering: Provide the correct number of parameters" N, rew_min, rew_max = map(int, rew_clustering_options[1:]) print("N:", N) print("Min reward:", rew_min) print("Max reward:", rew_max) interval_size = (rew_max - rew_min) / N print("Interval size:", interval_size) # Clip to avoid overflow and cluster ep_return = tf.clip_by_value(ep_return, rew_min, rew_max) ep_return = tf.cast(tf.floordiv(ep_return, interval_size) * interval_size, tf.float32) tf.add_to_collection('prints', tf.Print(ep_return, [ep_return], 'ep_return_clustered', summarize=20)) else: raise Exception('Unrecognized reward clustering scheme.') ''' return_mean = tf.reduce_mean(ep_return) return_std = U.reduce_std(ep_return) return_max = tf.reduce_max(ep_return) return_min = tf.reduce_min(ep_return) return_abs_max = tf.reduce_max(tf.abs(ep_return)) return_step_max = tf.reduce_max(tf.abs(rew_split)) # Max step reward return_step_mean = tf.abs(tf.reduce_mean(rew_split)) positive_step_return_max = tf.maximum(0.0, tf.reduce_max(rew_split)) negative_step_return_max = tf.maximum(0.0, tf.reduce_max(-rew_split)) return_step_maxmin = tf.abs(positive_step_return_max - negative_step_return_max) losses_with_name.extend([(return_mean, 'InitialReturnMean'), (return_max, 'InitialReturnMax'), (return_min, 'InitialReturnMin'), (return_std, 'InitialReturnStd'), (empirical_d2, 'EmpiricalD2'), (return_step_max, 'ReturnStepMax'), (return_step_maxmin, 'ReturnStepMaxmin')]) if iw_method == 'pdis': # log_ratio_split cumulative sum log_ratio_cumsum = tf.cumsum(log_ratio_split, axis=1) # Exponentiate ratio_cumsum = tf.exp(log_ratio_cumsum) # Multiply by the step-wise reward (not episode) ratio_reward = ratio_cumsum * disc_rew_split # Average on episodes ratio_reward_per_episode = tf.reduce_sum(ratio_reward, axis=1) w_return_mean = tf.reduce_sum(ratio_reward_per_episode, axis=0) / n_episodes # Get d2(w0:t) with mask d2_w_0t = tf.exp(tf.cumsum(emp_d2_split, axis=1)) * mask_split # LEAVE THIS OUTSIDE # Sum d2(w0:t) over timesteps episode_d2_0t = tf.reduce_sum(d2_w_0t, axis=1) # Sample variance J_sample_variance = (1/(n_episodes-1)) * tf.reduce_sum(tf.square(ratio_reward_per_episode - w_return_mean)) losses_with_name.append((J_sample_variance, 'J_sample_variance')) losses_with_name.extend([(tf.reduce_max(ratio_cumsum), 'MaxIW'), (tf.reduce_min(ratio_cumsum), 'MinIW'), (tf.reduce_mean(ratio_cumsum), 'MeanIW'), (U.reduce_std(ratio_cumsum), 'StdIW')]) losses_with_name.extend([(tf.reduce_max(d2_w_0t), 'MaxD2w0t'), (tf.reduce_min(d2_w_0t), 'MinD2w0t'), (tf.reduce_mean(d2_w_0t), 'MeanD2w0t'), (U.reduce_std(d2_w_0t), 'StdD2w0t')]) elif iw_method == 'is': iw = tf.exp(tf.reduce_sum(log_ratio_split, axis=1)) if iw_norm == 'none': iwn = iw / n_episodes w_return_mean = tf.reduce_sum(iwn * ep_return) J_sample_variance = (1/(n_episodes-1)) * tf.reduce_sum(tf.square(iw * ep_return - w_return_mean)) losses_with_name.append((J_sample_variance, 'J_sample_variance')) elif iw_norm == 'sn': iwn = iw / tf.reduce_sum(iw) w_return_mean = tf.reduce_sum(iwn * ep_return) elif iw_norm == 'regression': iwn = iw / n_episodes mean_iw = tf.reduce_mean(iw) beta = tf.reduce_sum((iw - mean_iw) * ep_return * iw) / (tf.reduce_sum((iw - mean_iw) ** 2) + 1e-24) w_return_mean = tf.reduce_mean(iw * ep_return - beta * (iw - 1)) else: raise NotImplementedError() ess_classic = tf.linalg.norm(iw, 1) ** 2 / tf.linalg.norm(iw, 2) ** 2 sqrt_ess_classic = tf.linalg.norm(iw, 1) / tf.linalg.norm(iw, 2) ess_renyi = n_episodes / empirical_d2 losses_with_name.extend([(tf.reduce_max(iwn), 'MaxIWNorm'), (tf.reduce_min(iwn), 'MinIWNorm'), (tf.reduce_mean(iwn), 'MeanIWNorm'), (U.reduce_std(iwn), 'StdIWNorm'), (tf.reduce_max(iw), 'MaxIW'), (tf.reduce_min(iw), 'MinIW'), (tf.reduce_mean(iw), 'MeanIW'), (U.reduce_std(iw), 'StdIW'), (ess_classic, 'ESSClassic'), (ess_renyi, 'ESSRenyi')]) elif iw_method == 'rbis': # Get pdfs for episodes target_log_pdf_episode = tf.reduce_sum(target_log_pdf_split, axis=1) behavioral_log_pdf_episode = tf.reduce_sum(behavioral_log_pdf_split, axis=1) # Normalize log_proba (avoid as overflows as possible) normalization_factor = tf.reduce_mean(tf.stack([target_log_pdf_episode, behavioral_log_pdf_episode])) target_norm_log_pdf_episode = target_log_pdf_episode - normalization_factor behavioral_norm_log_pdf_episode = behavioral_log_pdf_episode - normalization_factor # Exponentiate target_pdf_episode = tf.clip_by_value(tf.cast(tf.exp(target_norm_log_pdf_episode), tf.float64), 1e-300, 1e+300) behavioral_pdf_episode = tf.clip_by_value(tf.cast(tf.exp(behavioral_norm_log_pdf_episode), tf.float64), 1e-300, 1e+300) tf.add_to_collection('asserts', tf.assert_positive(target_pdf_episode, name='target_pdf_positive')) tf.add_to_collection('asserts', tf.assert_positive(behavioral_pdf_episode, name='behavioral_pdf_positive')) # Compute the merging matrix (reward-clustering) and the number of clusters reward_unique, reward_indexes = tf.unique(ep_return) episode_clustering_matrix = tf.cast(tf.one_hot(reward_indexes, n_episodes), tf.float64) max_index = tf.reduce_max(reward_indexes) + 1 trajectories_per_cluster = tf.reduce_sum(episode_clustering_matrix, axis=0)[:max_index] tf.add_to_collection('asserts', tf.assert_positive(tf.reduce_sum(episode_clustering_matrix, axis=0)[:max_index], name='clustering_matrix')) # Get the clustered pdfs clustered_target_pdf = tf.matmul(tf.reshape(target_pdf_episode, (1, -1)), episode_clustering_matrix)[0][:max_index] clustered_behavioral_pdf = tf.matmul(tf.reshape(behavioral_pdf_episode, (1, -1)), episode_clustering_matrix)[0][:max_index] tf.add_to_collection('asserts', tf.assert_positive(clustered_target_pdf, name='clust_target_pdf_positive')) tf.add_to_collection('asserts', tf.assert_positive(clustered_behavioral_pdf, name='clust_behavioral_pdf_positive')) # Compute the J ratio_clustered = clustered_target_pdf / clustered_behavioral_pdf #ratio_reward = tf.cast(ratio_clustered, tf.float32) * reward_unique # ---- No cluster cardinality ratio_reward = tf.cast(ratio_clustered, tf.float32) * reward_unique * tf.cast(trajectories_per_cluster, tf.float32) # ---- Cluster cardinality #w_return_mean = tf.reduce_sum(ratio_reward) / tf.cast(max_index, tf.float32) # ---- No cluster cardinality w_return_mean = tf.reduce_sum(ratio_reward) / tf.cast(n_episodes, tf.float32) # ---- Cluster cardinality # Divergences ess_classic = tf.linalg.norm(ratio_reward, 1) ** 2 / tf.linalg.norm(ratio_reward, 2) ** 2 sqrt_ess_classic = tf.linalg.norm(ratio_reward, 1) / tf.linalg.norm(ratio_reward, 2) ess_renyi = n_episodes / empirical_d2 # Summaries losses_with_name.extend([(tf.reduce_max(ratio_clustered), 'MaxIW'), (tf.reduce_min(ratio_clustered), 'MinIW'), (tf.reduce_mean(ratio_clustered), 'MeanIW'), (U.reduce_std(ratio_clustered), 'StdIW'), (1-(max_index / n_episodes), 'RewardCompression'), (ess_classic, 'ESSClassic'), (ess_renyi, 'ESSRenyi')]) else: raise NotImplementedError() if bound == 'J': bound_ = w_return_mean elif bound == 'std-d2': bound_ = w_return_mean - tf.sqrt((1 - delta) / (delta * ess_renyi)) * return_std elif bound == 'max-d2': var_estimate = tf.sqrt((1 - delta) / (delta * ess_renyi)) * return_abs_max bound_ = w_return_mean - tf.sqrt((1 - delta) / (delta * ess_renyi)) * return_abs_max elif bound == 'max-ess': bound_ = w_return_mean - tf.sqrt((1 - delta) / delta) / sqrt_ess_classic * return_abs_max elif bound == 'std-ess': bound_ = w_return_mean - tf.sqrt((1 - delta) / delta) / sqrt_ess_classic * return_std elif bound == 'pdis-max-d2': # Discount factor if gamma >= 1: discounter = [float(1+2*(horizon-t-1)) for t in range(0, horizon)] else: def f(t): return pow(gamma, 2*t) + (2*pow(gamma,t)*(pow(gamma, t+1) - pow(gamma, horizon))) / (1-gamma) discounter = [f(t) for t in range(0, horizon)] discounter_tf = tf.constant(discounter) mean_episode_d2 = tf.reduce_sum(d2_w_0t, axis=0) / (tf.reduce_sum(mask_split, axis=0) + 1e-24) discounted_d2 = mean_episode_d2 * discounter_tf # Discounted d2 discounted_total_d2 = tf.reduce_sum(discounted_d2, axis=0) # Sum over time bound_ = w_return_mean - tf.sqrt((1-delta) * discounted_total_d2 / (delta*n_episodes)) * return_step_max elif bound == 'pdis-mean-d2': # Discount factor if gamma >= 1: discounter = [float(1+2*(horizon-t-1)) for t in range(0, horizon)] else: def f(t): return pow(gamma, 2*t) + (2*pow(gamma,t)*(pow(gamma, t+1) - pow(gamma, horizon))) / (1-gamma) discounter = [f(t) for t in range(0, horizon)] discounter_tf = tf.constant(discounter) mean_episode_d2 = tf.reduce_sum(d2_w_0t, axis=0) / (tf.reduce_sum(mask_split, axis=0) + 1e-24) discounted_d2 = mean_episode_d2 * discounter_tf # Discounted d2 discounted_total_d2 = tf.reduce_sum(discounted_d2, axis=0) # Sum over time bound_ = w_return_mean - tf.sqrt((1-delta) * discounted_total_d2 / (delta*n_episodes)) * return_step_mean else: raise NotImplementedError() # Policy entropy for exploration ent = pi.pd.entropy() meanent = tf.reduce_mean(ent) losses_with_name.append((meanent, 'MeanEntropy')) # Add policy entropy bonus if entropy != 'none': scheme, v1, v2 = entropy.split(':') if scheme == 'step': entcoeff = tf.cond(iter_number_ < int(v2), lambda: float(v1), lambda: float(0.0)) losses_with_name.append((entcoeff, 'EntropyCoefficient')) entbonus = entcoeff * meanent bound_ = bound_ + entbonus elif scheme == 'lin': ip = tf.cast(iter_number_ / max_iters, tf.float32) entcoeff_decay = tf.maximum(0.0, float(v2) + (float(v1) - float(v2)) * (1.0 - ip)) losses_with_name.append((entcoeff_decay, 'EntropyCoefficient')) entbonus = entcoeff_decay * meanent bound_ = bound_ + entbonus elif scheme == 'exp': ent_f = tf.exp(-tf.abs(tf.reduce_mean(iw) - 1) * float(v2)) * float(v1) losses_with_name.append((ent_f, 'EntropyCoefficient')) bound_ = bound_ + ent_f * meanent else: raise Exception('Unrecognized entropy scheme.') losses_with_name.append((w_return_mean, 'ReturnMeanIW')) losses_with_name.append((bound_, 'Bound')) losses, loss_names = map(list, zip(*losses_with_name)) if use_natural_gradient: p = tf.placeholder(dtype=tf.float32, shape=[None]) target_logpdf_episode = tf.reduce_sum(target_log_pdf_split * mask_split, axis=1) grad_logprob = U.flatgrad(tf.stop_gradient(iwn) * target_logpdf_episode, var_list) dot_product = tf.reduce_sum(grad_logprob * p) hess_logprob = U.flatgrad(dot_product, var_list) compute_linear_operator = U.function([p, ob_, ac_, disc_rew_, mask_], [-hess_logprob]) assign_old_eq_new = U.function([], [], updates=[tf.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables())]) assert_ops = tf.group(*tf.get_collection('asserts')) print_ops = tf.group(*tf.get_collection('prints')) compute_lossandgrad = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], losses + [U.flatgrad(bound_, var_list), assert_ops, print_ops]) compute_grad = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], [U.flatgrad(bound_, var_list), assert_ops, print_ops]) compute_bound = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], [bound_, assert_ops, print_ops]) compute_losses = U.function([ob_, ac_, rew_, disc_rew_, clustered_rew_, mask_, iter_number_], losses) #compute_temp = U.function([ob_, ac_, rew_, disc_rew_, mask_], [ratio_cumsum, discounted_ratio]) set_parameter = U.SetFromFlat(var_list) get_parameter = U.GetFlat(var_list) if sampler is None: seg_gen = traj_segment_generator(pi, env, n_episodes, horizon, stochastic=True) sampler = type("SequentialSampler", (object,), {"collect": lambda self, _: seg_gen.__next__()})() U.initialize() # Starting optimizing episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=n_episodes) rewbuffer = deque(maxlen=n_episodes) while True: iters_so_far += 1 if render_after is not None and iters_so_far % render_after == 0: if hasattr(env, 'render'): render(env, pi, horizon) if callback: callback(locals(), globals()) if iters_so_far >= max_iters: print('Finised...') break logger.log('********** Iteration %i ************' % iters_so_far) theta = get_parameter() with timed('sampling'): seg = sampler.collect(theta) add_disc_rew(seg, gamma) lens, rets = seg['ep_lens'], seg['ep_rets'] lenbuffer.extend(lens) rewbuffer.extend(rets) episodes_so_far += len(lens) timesteps_so_far += sum(lens) # Get clustered reward reward_matrix = np.reshape(seg['disc_rew'] * seg['mask'], (n_episodes, horizon)) ep_reward = np.sum(reward_matrix, axis=1) if reward_clustering == 'none': pass elif reward_clustering == 'floor': ep_reward = np.floor(ep_reward) elif reward_clustering == 'ceil': ep_reward = np.ceil(ep_reward) elif reward_clustering == 'floor10': ep_reward = np.floor(ep_reward * 10) / 10 elif reward_clustering == 'ceil10': ep_reward = np.ceil(ep_reward * 10) / 10 elif reward_clustering == 'floor100': ep_reward = np.floor(ep_reward * 100) / 100 elif reward_clustering == 'ceil100': ep_reward = np.ceil(ep_reward * 100) / 100 args = ob, ac, rew, disc_rew, clustered_rew, mask, iter_number = seg['ob'], seg['ac'], seg['rew'], seg['disc_rew'], ep_reward, seg['mask'], iters_so_far assign_old_eq_new() def evaluate_loss(): loss = compute_bound(*args) return loss[0] def evaluate_gradient(): gradient = compute_grad(*args) return gradient[0] if use_natural_gradient: def evaluate_fisher_vector_prod(x): return compute_linear_operator(x, *args)[0] + fisher_reg * x def evaluate_natural_gradient(g): return cg(evaluate_fisher_vector_prod, g, cg_iters=10, verbose=0) else: evaluate_natural_gradient = None with timed('summaries before'): logger.record_tabular("Iteration", iters_so_far) logger.record_tabular("InitialBound", evaluate_loss()) logger.record_tabular("EpLenMean", np.mean(lenbuffer)) logger.record_tabular("EpRewMean", np.mean(rewbuffer)) logger.record_tabular("EpThisIter", len(lens)) logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", timesteps_so_far) logger.record_tabular("TimeElapsed", time.time() - tstart) if save_weights: logger.record_tabular('Weights', str(get_parameter())) import pickle file = open('checkpoint.pkl', 'wb') pickle.dump(theta, file) with timed("offline optimization"): theta, improvement = optimize_offline(theta, set_parameter, line_search, evaluate_loss, evaluate_gradient, evaluate_natural_gradient, max_offline_ite=max_offline_iters) set_parameter(theta) with timed('summaries after'): meanlosses = np.array(compute_losses(*args)) for (lossname, lossval) in zip(loss_names, meanlosses): logger.record_tabular(lossname, lossval) logger.dump_tabular() env.close()
def transpose_coordinates(self): """Transpose the coordinate representation in a boxlist.""" with tf.name_scope('transpose_coordinates'): y_min, x_min, y_max, x_max = tf.split( value=self.get(), num_or_size_splits=4, axis=1) self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--filelist', '-t', help='Path to training set ground truth (.txt)', required=True) parser.add_argument('--filelist_val', '-v', help='Path to validation set ground truth (.txt)', required=True) parser.add_argument('--load_ckpt', '-l', help='Path to a check point file for load') parser.add_argument( '--save_folder', '-s', help='Path to folder for saving check points and summary', required=True) parser.add_argument('--model', '-m', help='Model to use', required=True) parser.add_argument('--setting', '-x', help='Setting to use', required=True) args = parser.parse_args() time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') root_folder = os.path.join( args.save_folder, '%s_%s_%d_%s' % (args.model, args.setting, os.getpid(), time_string)) if not os.path.exists(root_folder): os.makedirs(root_folder) sys.stdout = open(os.path.join(root_folder, 'log.txt'), 'w') print('PID:', os.getpid()) print(args) model = importlib.import_module(args.model) setting_path = os.path.join(os.path.dirname(__file__), args.model) sys.path.append(setting_path) setting = importlib.import_module(args.setting) num_epochs = setting.num_epochs batch_size = setting.batch_size sample_num = setting.sample_num step_val = 500 num_parts = setting.num_parts label_weights_list = setting.label_weights scaling_range = setting.scaling_range scaling_range_val = setting.scaling_range_val jitter = setting.jitter jitter_val = setting.jitter_val # Prepare inputs print('{}-Preparing datasets...'.format(datetime.now())) data_train, _, data_num_train, label_train = data_utils.load_seg( args.filelist) data_val, _, data_num_val, label_val = data_utils.load_seg( args.filelist_val) # shuffle data_train, data_num_train, label_train = \ data_utils.grouped_shuffle([data_train, data_num_train, label_train]) num_train = data_train.shape[0] point_num = data_train.shape[1] num_val = data_val.shape[0] print('{}-{:d}/{:d} training/validation samples.'.format( datetime.now(), num_train, num_val)) batch_num = (num_train * num_epochs + batch_size - 1) // batch_size print('{}-{:d} training batches.'.format(datetime.now(), batch_num)) ###################################################################### # Placeholders indices = tf.placeholder(tf.int32, shape=(None, None, 2), name="indices") xforms = tf.placeholder(tf.float32, shape=(None, 3, 3), name="xforms") rotations = tf.placeholder(tf.float32, shape=(None, 3, 3), name="rotations") jitter_range = tf.placeholder(tf.float32, shape=(1), name="jitter_range") global_step = tf.Variable(0, trainable=False, name='global_step') is_training = tf.placeholder(tf.bool, name='is_training') pts_fts = tf.placeholder(tf.float32, shape=(None, point_num, setting.data_dim), name='pts_fts') labels_seg = tf.placeholder(tf.int32, shape=(None, point_num), name='labels_seg') labels_weights = tf.placeholder(tf.float32, shape=(None, point_num), name='labels_weights') ###################################################################### features_augmented = None if setting.data_dim > 3: points, features = tf.split(pts_fts, [3, setting.data_dim - 3], axis=-1, name='split_points_features') if setting.use_extra_features: features_sampled = tf.gather_nd(features, indices=indices, name='features_sampled') if setting.with_normal_feature: if setting.data_dim < 6: print('Only 3D normals are supported!') exit() elif setting.data_dim == 6: features_augmented = pf.augment(features_sampled, rotations) else: normals, rest = tf.split(features_sampled, [3, setting.data_dim - 6]) normals_augmented = pf.augment(normals, rotations) features_augmented = tf.concat([normals_augmented, rest], axis=-1) else: features_augmented = features_sampled else: points = pts_fts points_sampled = tf.gather_nd(points, indices=indices, name='points_sampled') points_augmented = pf.augment(points_sampled, xforms, jitter_range) labels_sampled = tf.gather_nd(labels_seg, indices=indices, name='labels_sampled') labels_weights_sampled = tf.gather_nd(labels_weights, indices=indices, name='labels_weight_sampled') net = model.Net(points_augmented, features_augmented, num_parts, is_training, setting) logits, probs = net.logits, net.probs loss_op = tf.losses.sparse_softmax_cross_entropy( labels=labels_sampled, logits=logits, weights=labels_weights_sampled) t_1_acc_op = pf.top_1_accuracy(probs, labels_sampled) _ = tf.summary.scalar('loss/train_seg', tensor=loss_op, collections=['train']) _ = tf.summary.scalar('t_1_acc/train_seg', tensor=t_1_acc_op, collections=['train']) loss_val_avg = tf.placeholder(tf.float32) t_1_acc_val_avg = tf.placeholder(tf.float32) _ = tf.summary.scalar('loss/val_seg', tensor=loss_val_avg, collections=['val']) _ = tf.summary.scalar('t_1_acc/val_seg', tensor=t_1_acc_val_avg, collections=['val']) lr_exp_op = tf.train.exponential_decay(setting.learning_rate_base, global_step, setting.decay_steps, setting.decay_rate, staircase=True) lr_clip_op = tf.maximum(lr_exp_op, setting.learning_rate_min) _ = tf.summary.scalar('learning_rate', tensor=lr_clip_op, collections=['train']) reg_loss = setting.weight_decay * tf.losses.get_regularization_loss() if setting.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=lr_clip_op, epsilon=setting.epsilon) elif setting.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=lr_clip_op, momentum=0.9, use_nesterov=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss_op + reg_loss, global_step=global_step) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) # backup this file, model and setting shutil.copy(__file__, os.path.join(root_folder, os.path.basename(__file__))) shutil.copy(os.path.join(os.path.dirname(__file__), args.model + '.py'), os.path.join(root_folder, args.model + '.py')) if not os.path.exists(os.path.join(root_folder, args.model)): os.makedirs(os.path.join(root_folder, args.model)) shutil.copy(os.path.join(setting_path, args.setting + '.py'), os.path.join(root_folder, args.model, args.setting + '.py')) folder_ckpt = os.path.join(root_folder, 'ckpts') if not os.path.exists(folder_ckpt): os.makedirs(folder_ckpt) folder_summary = os.path.join(root_folder, 'summary') if not os.path.exists(folder_summary): os.makedirs(folder_summary) parameter_num = np.sum( [np.prod(v.shape.as_list()) for v in tf.trainable_variables()]) print('{}-Parameter number: {:d}.'.format(datetime.now(), parameter_num)) with tf.Session() as sess: summaries_op = tf.summary.merge_all('train') summaries_val_op = tf.summary.merge_all('val') summary_writer = tf.summary.FileWriter(folder_summary, sess.graph) sess.run(init_op) # Load the model if args.load_ckpt is not None: saver.restore(sess, args.load_ckpt) print('{}-Checkpoint loaded from {}!'.format( datetime.now(), args.load_ckpt)) for batch_idx in range(batch_num): if (batch_idx != 0 and batch_idx % step_val == 0) or batch_idx == batch_num - 1: ###################################################################### # Validation filename_ckpt = os.path.join(folder_ckpt, 'iter') saver.save(sess, filename_ckpt, global_step=global_step) print('{}-Checkpoint saved to {}!'.format( datetime.now(), filename_ckpt)) losses = [] t_1_accs = [] for batch_val_idx in range(math.ceil(num_val / batch_size)): start_idx = batch_size * batch_val_idx end_idx = min(start_idx + batch_size, num_val) batch_size_val = end_idx - start_idx points_batch = data_val[start_idx:end_idx, ...] points_num_batch = data_num_val[start_idx:end_idx, ...] labels_batch = label_val[start_idx:end_idx, ...] weights_batch = np.array(label_weights_list)[label_val[ start_idx:end_idx, ...]] xforms_np, rotations_np = pf.get_xforms( batch_size_val, scaling_range=scaling_range_val) _, loss_val, t_1_acc_val = \ sess.run([update_ops, loss_op, t_1_acc_op], feed_dict={ pts_fts: points_batch, indices: pf.get_indices(batch_size_val, sample_num, points_num_batch, False), xforms: xforms_np, rotations: rotations_np, jitter_range: np.array([jitter_val]), labels_seg: labels_batch, labels_weights: weights_batch, is_training: False, }) losses.append(loss_val * batch_size_val) t_1_accs.append(t_1_acc_val * batch_size_val) print( '{}-[Val ]-Iter: {:06d} Loss: {:.4f} T-1 Acc: {:.4f}' .format(datetime.now(), batch_val_idx, loss_val, t_1_acc_val)) sys.stdout.flush() loss_avg = sum(losses) / num_val t_1_acc_avg = sum(t_1_accs) / num_val summaries_val = sess.run(summaries_val_op, feed_dict={ loss_val_avg: loss_avg, t_1_acc_val_avg: t_1_acc_avg, }) summary_writer.add_summary(summaries_val, batch_idx) print('{}-[Val ]-Average: Loss: {:.4f} T-1 Acc: {:.4f}'. format(datetime.now(), loss_avg, t_1_acc_avg)) sys.stdout.flush() ###################################################################### ###################################################################### # Training start_idx = (batch_size * batch_idx) % num_train end_idx = min(start_idx + batch_size, num_train) batch_size_train = end_idx - start_idx points_batch = data_train[start_idx:end_idx, ...] points_num_batch = data_num_train[start_idx:end_idx, ...] labels_batch = label_train[start_idx:end_idx, ...] weights_batch = np.array(label_weights_list)[labels_batch] if start_idx + batch_size_train == num_train: data_train, data_num_train, label_train = \ data_utils.grouped_shuffle([data_train, data_num_train, label_train]) offset = int(random.gauss(0, sample_num // 8)) offset = max(offset, -sample_num // 4) offset = min(offset, sample_num // 4) sample_num_train = sample_num + offset xforms_np, rotations_np = pf.get_xforms( batch_size_train, scaling_range=scaling_range) _, loss, t_1_acc, summaries = \ sess.run([train_op, loss_op, t_1_acc_op, summaries_op], feed_dict={ pts_fts: points_batch, indices: pf.get_indices(batch_size_train, sample_num_train, points_num_batch), xforms: xforms_np, rotations: rotations_np, jitter_range: np.array([jitter]), labels_seg: labels_batch, labels_weights: weights_batch, is_training: True, }) summary_writer.add_summary(summaries, batch_idx) print('{}-[Train]-Iter: {:06d} Loss: {:.4f} T-1 Acc: {:.4f}'. format(datetime.now(), batch_idx, loss, t_1_acc)) sys.stdout.flush() ###################################################################### print('{}-Done!'.format(datetime.now()))
def __init__(self, embedding_mat, non_static, hidden_unit, sequence_length, max_pool_size, num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x') self.input_y = tf.placeholder(tf.float32, [None, num_classes], name='input_y') self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') self.batch_size = tf.placeholder(tf.int32, []) self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name='pad') self.real_len = tf.placeholder(tf.int32, [None], name='real_len') l2_loss = tf.constant(0.0) with tf.device('/cpu:0'), tf.name_scope('embedding'): if not non_static: W = tf.constant(embedding_mat, name='W') else: W = tf.Variable(embedding_mat, name='W') self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) emb = tf.expand_dims(self.embedded_chars, -1) pooled_concat = [] reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size)) for i, filter_size in enumerate(filter_sizes): with tf.name_scope('conv-maxpool-%s' % filter_size): # Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel num_prio = (filter_size - 1) // 2 num_post = (filter_size - 1) - num_prio pad_prio = tf.concat([self.pad] * num_prio, 1) pad_post = tf.concat([self.pad] * num_post, 1) emb_pad = tf.concat([pad_prio, emb, pad_post], 1) filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='W') b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name='b') conv = tf.nn.conv2d(emb_pad, W, strides=[1, 1, 1, 1], padding='VALID', name='conv') h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu') # Maxpooling over the outputs pooled = tf.nn.max_pool(h, ksize=[1, max_pool_size, 1, 1], strides=[1, max_pool_size, 1, 1], padding='SAME', name='pool') pooled = tf.reshape(pooled, [-1, reduced, num_filters]) pooled_concat.append(pooled) pooled_concat = tf.concat(pooled_concat, 2) pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob) # lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=hidden_unit) # lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_unit) lstm_cell = tf.contrib.rnn.GRUCell(num_units=hidden_unit) # lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.dropout_keep_prob) lstm_cell = tf.contrib.rnn.DropoutWrapper( lstm_cell, output_keep_prob=self.dropout_keep_prob) self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32) # inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, reduced, pooled_concat)] inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split( pooled_concat, num_or_size_splits=int(reduced), axis=1) ] # outputs, state = tf.nn.rnn(lstm_cell, inputs, initial_state=self._initial_state, sequence_length=self.real_len) outputs, state = tf.contrib.rnn.static_rnn( lstm_cell, inputs, initial_state=self._initial_state, sequence_length=self.real_len) # Collect the appropriate last words into variable output (dimension = batch x embedding_size) output = outputs[0] with tf.variable_scope('Output'): tf.get_variable_scope().reuse_variables() one = tf.ones([1, hidden_unit], tf.float32) for i in range(1, len(outputs)): ind = self.real_len < (i + 1) ind = tf.to_float(ind) ind = tf.expand_dims(ind, -1) mat = tf.matmul(ind, one) output = tf.add(tf.multiply(output, mat), tf.multiply(outputs[i], 1.0 - mat)) with tf.name_scope('output'): self.W = tf.Variable(tf.truncated_normal( [hidden_unit, num_classes], stddev=0.1), name='W') b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name='b') l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(output, self.W, b, name='scores') self.predictions = tf.argmax(self.scores, 1, name='predictions') with tf.name_scope('loss'): losses = tf.nn.softmax_cross_entropy_with_logits( labels=self.input_y, logits=self.scores) # only named arguments accepted self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss with tf.name_scope('accuracy'): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name='accuracy') with tf.name_scope('num_correct'): correct = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.num_correct = tf.reduce_sum(tf.cast(correct, 'float'))
# a specific format. Read more at: https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn all_inputs = tf.concat([tf.expand_dims(t, 0) for t in train_inputs], axis=0) # all_outputs is [seq_length, batch_size, num_nodes] all_lstm_outputs, state = tf.nn.dynamic_rnn(drop_multi_cell, all_inputs, initial_state=tuple(initial_state), time_major=True, dtype=tf.float32) all_lstm_outputs = tf.reshape(all_lstm_outputs, [batch_size * num_unrollings, num_nodes[-1]]) all_outputs = tf.nn.xw_plus_b(all_lstm_outputs, w, b) split_outputs = tf.split(all_outputs, num_unrollings, axis=0) # When calculating the loss you need to be careful about the exact form, because you calculate # loss of all the unrolled steps at the same time # Therefore, take the mean error or each batch and get the sum of that over all the unrolled steps print('Defining training Loss') loss = 0.0 with tf.control_dependencies( [tf.assign(c[li], state[li][0]) for li in range(n_layers)] + [tf.assign(h[li], state[li][1]) for li in range(n_layers)]): for ui in range(num_unrollings): loss += tf.reduce_mean(0.5 * (split_outputs[ui] - train_outputs[ui])**2) print('Learning rate decay operations')
def G_synthesis_co_mod_gan( dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. images_in, masks_in, dlatent_size=512, # Disentangled latent (W) dimensionality. num_channels=3, # Number of output color channels. resolution=1024, # Output resolution. fmap_base=16 << 10, # Overall multiplier for the number of feature maps. fmap_decay=1.0, # log2 feature map reduction when doubling the resolution. fmap_min=1, # Minimum number of feature maps in any layer. fmap_max=512, # Maximum number of feature maps in any layer. randomize_noise=True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. architecture='skip', # Architecture: 'orig', 'skip', 'resnet'. nonlinearity='lrelu', # Activation function: 'relu', 'lrelu', etc. dtype='float32', # Data type to use for activations and outputs. resample_kernel=[ 1, 3, 3, 1 ], # Low-pass filter to apply when resampling activations. None = no filtering. fused_modconv=True, # Implement modulated_conv2d_layer() as a single fused op? is_training=False, # Network is under training? Enables and disables specific features. pix2pix=False, dropout_rate=0.5, cond_mod=True, style_mod=True, noise_injection=True, **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return np.clip(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_min, fmap_max) assert architecture in ['orig', 'skip', 'resnet'] act = nonlinearity num_layers = resolution_log2 * 2 - 2 images_out = None images_in.set_shape([None, num_channels, resolution, resolution]) masks_in.set_shape([None, 1, resolution, resolution]) images_in = tf.cast(images_in, dtype) masks_in = tf.cast(masks_in, dtype) def E_fromrgb(x, y, res): # res = 2..resolution_log2 with tf.variable_scope('FromRGB'): t = apply_bias_act(conv2d_layer(y, fmaps=nf(res - 1), kernel=1), act=act) return t if x is None else x + t def E_block(x, res, E_features): # res = 2..resolution_log2 with tf.variable_scope('Conv0'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res - 1), kernel=3), act=act) E_features[res] = x with tf.variable_scope('Conv1_down'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res - 2), kernel=3, down=True, resample_kernel=resample_kernel), act=act) return x # Primary inputs. dlatents_in.set_shape([None, num_layers, dlatent_size]) dlatents_in = tf.cast(dlatents_in, dtype) # Noise inputs. noise_inputs = [] for layer_idx in range(num_layers - 1): res = (layer_idx + 5) // 2 shape = [1, 1, 2**res, 2**res] noise_inputs.append( tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) # Main layers. E_features = {} x = None if pix2pix: num_channels = num_channels // 2 _, y = tf.split(images_in, 2, axis=1) cond = y else: y = tf.concat([masks_in - 0.5, images_in * masks_in], axis=1) for res in range(resolution_log2, 2, -1): with tf.variable_scope('E_%dx%d' % (2**res, 2**res)): if res == resolution_log2: x = E_fromrgb(x, y, res) x = E_block(x, res, E_features) # Final layers. with tf.variable_scope('E_4x4'): with tf.variable_scope('Conv'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act) E_features[2] = x with tf.variable_scope('Dense0'): x = apply_bias_act(dense_layer(x, fmaps=nf(1) * 2), act=act) # if is_training: x = tf.nn.dropout(x, dropout_rate) x_global = x # Single convolution layer with all the bells and whistles. def layer(x, layer_idx, fmaps, kernel, up=False): mod_vector = [] if style_mod: mod_vector.append(dlatents_in[:, layer_idx]) if cond_mod: mod_vector.append(x_global) x = modulated_conv2d_layer( x, tf.concat(mod_vector, axis=1) if mod_vector else None, fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv) if noise_injection: if randomize_noise: noise = tf.random_normal( [tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) else: noise = tf.cast(noise_inputs[layer_idx], x.dtype) noise_strength = tf.get_variable( 'noise_strength', shape=[], initializer=tf.initializers.zeros()) x += noise * tf.cast(noise_strength, x.dtype) return apply_bias_act(x, act=act) # Building blocks for main layers. def block(x, res, E_features): # res = 3..resolution_log2 x_skip = E_features[res] t = x with tf.variable_scope('Conv0_up'): x = layer(x, layer_idx=res * 2 - 5, fmaps=nf(res - 1), kernel=3, up=True) x = x + x_skip with tf.variable_scope('Conv1'): x = layer(x, layer_idx=res * 2 - 4, fmaps=nf(res - 1), kernel=3) if architecture == 'resnet': with tf.variable_scope('Skip'): t = conv2d_layer(t, fmaps=nf(res - 1), kernel=1, up=True, resample_kernel=resample_kernel) x = (x + t) * (1 / np.sqrt(2)) return x def upsample(y): with tf.variable_scope('Upsample'): return upsample_2d(y, k=resample_kernel) def torgb(x, y, res): # res = 2..resolution_log2 mod_vector = [] if style_mod: mod_vector.append(dlatents_in[:, res * 2 - 3]) if cond_mod: mod_vector.append(x_global) with tf.variable_scope('ToRGB'): t = apply_bias_act( modulated_conv2d_layer( x, tf.concat(mod_vector, axis=1) if mod_vector else None, fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv)) return t if y is None else y + t # Early layers. y = None with tf.variable_scope('G_4x4'): with tf.variable_scope('Dense'): x = apply_bias_act(dense_layer(x, fmaps=nf(1) * 4 * 4), act=act) x = tf.reshape(x, [-1, nf(1), 4, 4]) x = x + E_features[2] with tf.variable_scope('Conv'): x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3) if architecture == 'skip': y = torgb(x, y, 2) # Main layers. for res in range(3, resolution_log2 + 1): with tf.variable_scope('G_%dx%d' % (2**res, 2**res)): x = block(x, res, E_features) if architecture == 'skip': y = upsample(y) if architecture == 'skip' or res == resolution_log2: y = torgb(x, y, res) if pix2pix: images_out = tf.concat([y, cond], axis=1) else: images_out = y * (1 - masks_in) + images_in * masks_in assert images_out.dtype == tf.as_dtype(dtype) return tf.identity(images_out, name='images_out')
def build(self): # Setup input placeholders self._set_up_input_pls() # Setup feature extractors self._set_up_feature_extractors() bev_proposal_input = self.bev_bottleneck img_proposal_input = self.img_bottleneck fusion_mean_div_factor = 2.0 # If both img and bev probabilites are set to 1.0, don't do # path drop. if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('rpn_path_drop'): random_values = tf.random_uniform(shape=[3], minval=0.0, maxval=1.0) img_mask, bev_mask = self.create_path_drop_masks( self._path_drop_probabilities[0], self._path_drop_probabilities[1], random_values) img_proposal_input = tf.multiply(img_proposal_input, img_mask) bev_proposal_input = tf.multiply(bev_proposal_input, bev_mask) self.img_path_drop_mask = img_mask self.bev_path_drop_mask = bev_mask # Overwrite the division factor fusion_mean_div_factor = img_mask + bev_mask with tf.variable_scope('proposal_roi_pooling'): with tf.variable_scope('box_indices'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) bev_boxes_norm_batches = tf.expand_dims( self._bev_anchors_norm_pl, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV bev_proposal_rois = tf.image.crop_and_resize( bev_proposal_input, self._bev_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) # Do ROI Pooling on image img_proposal_rois = tf.image.crop_and_resize( img_proposal_input, self._img_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) print("img_proposal_rois shape") # print(img_proposal_rois.shape) # for i in range(img_proposal_rois.shape[0]): # print(img_proposal_rois[i]) #################################################################################### # TODO PROJECT: insert code here to add mixture of experts # self._moe_model = MoeModel(img_proposal_input, bev_proposal_input) # self._moe_model._set_up_input_pls() # moe_prediction = self._moe_model.build() #################################################################################### with tf.variable_scope('proposal_roi_fusion'): rpn_fusion_out = None #################################################################################### # TODO PROJECT: weight the feature before average img and bev # weighted_img_proposal_rois = tf.multiply(moe_prediction['img_weight'],img_proposal_rois) # weighted_bev_proposal_rois = tf.multiply(moe_prediction['bev_weight'],bev_proposal_rois) #################################################################################### if self._fusion_method == 'mean': tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois) rpn_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor) #################################################################################### # TODO PROJECT: weight the feature before average img and bev # tf_features_sum = tf.add(weighted_bev_proposal_rois, weighted_img_proposal_rois) # rpn_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor) #################################################################################### elif self._fusion_method == 'concat': rpn_fusion_out = tf.concat( [bev_proposal_rois, img_proposal_rois], axis=3) #################################################################################### # TODO PROJECT: weight the feature before concatenation # rpn_fusion_out = tf.concat( # [weighted_bev_proposal_rois, weighted_img_proposal_rois], axis=3) #################################################################################### else: raise ValueError('Invalid fusion method', self._fusion_method) # TODO: move this section into an separate AnchorPredictor class with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]): tensor_in = rpn_fusion_out # Parse rpn layers config layers_config = self._config.layers_config.rpn_config l2_weight_decay = layers_config.l2_weight_decay if l2_weight_decay > 0: weights_regularizer = slim.l2_regularizer(l2_weight_decay) else: weights_regularizer = None with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): # Use conv2d instead of fully_connected layers. cls_fc6 = slim.conv2d(tensor_in, layers_config.cls_fc6, self._proposal_roi_crop_size, padding='VALID', scope='cls_fc6') cls_fc6_drop = slim.dropout(cls_fc6, layers_config.keep_prob, is_training=self._is_training, scope='cls_fc6_drop') cls_fc7 = slim.conv2d(cls_fc6_drop, layers_config.cls_fc7, [1, 1], scope='cls_fc7') cls_fc7_drop = slim.dropout(cls_fc7, layers_config.keep_prob, is_training=self._is_training, scope='cls_fc7_drop') cls_fc8 = slim.conv2d(cls_fc7_drop, 2, [1, 1], activation_fn=None, scope='cls_fc8') objectness = tf.squeeze(cls_fc8, [1, 2], name='cls_fc8/squeezed') # Use conv2d instead of fully_connected layers. reg_fc6 = slim.conv2d(tensor_in, layers_config.reg_fc6, self._proposal_roi_crop_size, padding='VALID', scope='reg_fc6') reg_fc6_drop = slim.dropout(reg_fc6, layers_config.keep_prob, is_training=self._is_training, scope='reg_fc6_drop') reg_fc7 = slim.conv2d(reg_fc6_drop, layers_config.reg_fc7, [1, 1], scope='reg_fc7') reg_fc7_drop = slim.dropout(reg_fc7, layers_config.keep_prob, is_training=self._is_training, scope='reg_fc7_drop') reg_fc8 = slim.conv2d(reg_fc7_drop, 6, [1, 1], activation_fn=None, scope='reg_fc8') offsets = tf.squeeze(reg_fc8, [1, 2], name='reg_fc8/squeezed') # Histogram summaries with tf.variable_scope('histograms_feature_extractor'): with tf.variable_scope('bev_vgg'): for end_point in self.bev_end_points: tf.summary.histogram(end_point, self.bev_end_points[end_point]) with tf.variable_scope('img_vgg'): for end_point in self.img_end_points: tf.summary.histogram(end_point, self.img_end_points[end_point]) with tf.variable_scope('histograms_rpn'): with tf.variable_scope('anchor_predictor'): fc_layers = [ cls_fc6, cls_fc7, cls_fc8, objectness, reg_fc6, reg_fc7, reg_fc8, offsets ] for fc_layer in fc_layers: # fix the name to avoid tf warnings tf.summary.histogram(fc_layer.name.replace(':', '_'), fc_layer) # Return the proposals with tf.variable_scope('proposals'): anchors = self.placeholders[self.PL_ANCHORS] # Decode anchor regression offsets with tf.variable_scope('decoding'): regressed_anchors = anchor_encoder.offset_to_anchor( anchors, offsets) with tf.variable_scope('bev_projection'): _, bev_proposal_boxes_norm = anchor_projector.project_to_bev( regressed_anchors, self._bev_extents) with tf.variable_scope('softmax'): objectness_softmax = tf.nn.softmax(objectness) with tf.variable_scope('nms'): objectness_scores = objectness_softmax[:, 1] # Do NMS on regressed anchors top_indices = tf.image.non_max_suppression( bev_proposal_boxes_norm, objectness_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_thresh) top_anchors = tf.gather(regressed_anchors, top_indices) top_objectness_softmax = tf.gather(objectness_scores, top_indices) # top_offsets = tf.gather(offsets, top_indices) # top_objectness = tf.gather(objectness, top_indices) # Get mini batch all_ious_gt = self.placeholders[self.PL_ANCHOR_IOUS] all_offsets_gt = self.placeholders[self.PL_ANCHOR_OFFSETS] all_classes_gt = self.placeholders[self.PL_ANCHOR_CLASSES] with tf.variable_scope('mini_batch'): mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils mini_batch_mask, _ = \ mini_batch_utils.sample_rpn_mini_batch(all_ious_gt) # ROI summary images rpn_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.rpn_mini_batch_size with tf.variable_scope('bev_rpn_rois'): mb_bev_anchors_norm = tf.boolean_mask(self._bev_anchors_norm_pl, mini_batch_mask) mb_bev_box_indices = tf.zeros_like(tf.boolean_mask( all_classes_gt, mini_batch_mask), dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize(self._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split(bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_rpn_rois', bev_input_roi_summary_images[-1], max_outputs=rpn_mini_batch_size) with tf.variable_scope('img_rpn_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask(self._img_anchors_norm_pl, mini_batch_mask) mb_img_box_indices = tf.zeros_like(tf.boolean_mask( all_classes_gt, mini_batch_mask), dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize(self._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_rpn_rois', img_input_rois, max_outputs=rpn_mini_batch_size) # Ground Truth Tensors with tf.variable_scope('one_hot_classes'): # Anchor classification ground truth # Object / Not Object min_pos_iou = \ self.dataset.kitti_utils.mini_batch_utils.rpn_pos_iou_range[0] objectness_classes_gt = tf.cast(tf.greater_equal( all_ious_gt, min_pos_iou), dtype=tf.int32) objectness_gt = tf.one_hot( objectness_classes_gt, depth=2, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=self._config.label_smoothing_epsilon) # Mask predictions for mini batch with tf.variable_scope('prediction_mini_batch'): objectness_masked = tf.boolean_mask(objectness, mini_batch_mask) offsets_masked = tf.boolean_mask(offsets, mini_batch_mask) with tf.variable_scope('ground_truth_mini_batch'): objectness_gt_masked = tf.boolean_mask(objectness_gt, mini_batch_mask) offsets_gt_masked = tf.boolean_mask(all_offsets_gt, mini_batch_mask) # Specify the tensors to evaluate predictions = dict() # Temporary predictions for debugging # predictions['anchor_ious'] = anchor_ious # predictions['anchor_offsets'] = all_offsets_gt if self._train_val_test in ['train', 'val']: # All anchors predictions[self.PRED_ANCHORS] = anchors # Mini-batch masks predictions[self.PRED_MB_MASK] = mini_batch_mask # Mini-batch predictions predictions[self.PRED_MB_OBJECTNESS] = objectness_masked predictions[self.PRED_MB_OFFSETS] = offsets_masked # Mini batch ground truth predictions[self.PRED_MB_OFFSETS_GT] = offsets_gt_masked predictions[self.PRED_MB_OBJECTNESS_GT] = objectness_gt_masked # Proposals after nms predictions[self.PRED_TOP_INDICES] = top_indices predictions[self.PRED_TOP_ANCHORS] = top_anchors predictions[ self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax else: # self._train_val_test == 'test' predictions[self.PRED_TOP_ANCHORS] = top_anchors predictions[ self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax return predictions
def __init__(self, embedding_mat, non_static, hidden_unit, sequence_length, max_pool_size, num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x') self.input_y = tf.placeholder(tf.float32, [None, num_classes], name='input_y') self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob') self.batch_size = tf.placeholder(tf.int32) self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name='pad') self.real_len = tf.placeholder(tf.int32, [None], name='real_len') l2_loss = tf.constant(0.0) with tf.device('/cpu:0'), tf.name_scope('embedding'): if not non_static: W = tf.constant(embedding_mat, name='W') else: W = tf.Variable(embedding_mat, name='W') self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) # emb = tf.expand_dims(self.embedded_chars, -1) reduced = sequence_length lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=hidden_unit) lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=int(hidden_unit / 2)) lstm_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=self.dropout_keep_prob) lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_units=int(hidden_unit / 2)) lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_bw_cell, output_keep_prob=self.dropout_keep_prob) self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32) self._initial_state_bw = lstm_bw_cell.zero_state( self.batch_size, tf.float32) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, reduced, self.embedded_chars) ] outputs, _, _ = tf.nn.bidirectional_rnn( lstm_cell, lstm_bw_cell, inputs, initial_state_fw=self._initial_state, initial_state_bw=self._initial_state_bw, sequence_length=self.real_len) # Collect the appropriate last words into variable output (dimension = batch x embedding_size) output = outputs[0] with tf.variable_scope('Output'): tf.get_variable_scope().reuse_variables() one = tf.ones([1, hidden_unit], tf.float32) for i in range(1, len(outputs)): ind = self.real_len < (i + 1) ind = tf.to_float(ind) ind = tf.expand_dims(ind, -1) mat = tf.matmul(ind, one) output = tf.add(tf.mul(output, mat), tf.mul(outputs[i], 1.0 - mat)) with tf.name_scope('output'): self.W = tf.Variable(tf.truncated_normal( [hidden_unit, num_classes], stddev=0.1), name='W') b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name='b') l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(output, self.W, b, name='scores') self.predictions = tf.argmax(self.scores, 1, name='predictions') with tf.name_scope('loss'): losses = tf.nn.softmax_cross_entropy_with_logits( self.scores, self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss with tf.name_scope('accuracy'): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name='accuracy') with tf.name_scope('num_correct'): correct = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.num_correct = tf.reduce_sum(tf.cast(correct, 'float'))
def __call__(self, x, prev_state): prev_read_vector_list = prev_state.read_vector_list controller_input = tf.concat([x] + prev_read_vector_list, axis=1) with tf.compat.v1.variable_scope('controller', reuse=self.reuse): controller_output, controller_state = self.controller(controller_input, prev_state.controller_state) num_parameters_per_head = self.memory_vector_dim + 1 + 1 + (self.shift_range * 2 + 1) + 1 num_heads = self.read_head_num + self.write_head_num total_parameter_num = num_parameters_per_head * num_heads + self.memory_vector_dim * 2 * self.write_head_num with tf.compat.v1.variable_scope("o2p", reuse=(self.step > 0) or self.reuse): parameters = tf.compat.v1.layers.dense( controller_output, total_parameter_num, activation=None, kernel_initializer=self.o2p_initializer) parameters = tf.clip_by_value(parameters, -self.clip_value, self.clip_value) head_parameter_list = tf.split(parameters[:, :num_parameters_per_head * num_heads], num_heads, axis=1) erase_add_list = tf.split(parameters[:, num_parameters_per_head * num_heads:], 2 * self.write_head_num, axis=1) prev_w_list = prev_state.w_list prev_M = prev_state.M w_list = [] for i, head_parameter in enumerate(head_parameter_list): k = tf.tanh(head_parameter[:, 0:self.memory_vector_dim]) beta = tf.nn.softplus(head_parameter[:, self.memory_vector_dim]) g = tf.sigmoid(head_parameter[:, self.memory_vector_dim + 1]) s = tf.nn.softmax( head_parameter[:, self.memory_vector_dim + 2:self.memory_vector_dim + 2 + (self.shift_range * 2 + 1)] ) gamma = tf.nn.softplus(head_parameter[:, -1]) + 1 with tf.compat.v1.variable_scope('addressing_head_%d' % i): w = self.addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i]) w_list.append(w) # Reading (Sec 3.1) read_w_list = w_list[:self.read_head_num] read_vector_list = [] for i in range(self.read_head_num): read_vector = tf.reduce_sum(tf.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1) read_vector_list.append(read_vector) # Writing (Sec 3.2) write_w_list = w_list[self.read_head_num:] M = prev_M for i in range(self.write_head_num): w = tf.expand_dims(write_w_list[i], axis=2) erase_vector = tf.expand_dims(tf.sigmoid(erase_add_list[i * 2]), axis=1) add_vector = tf.expand_dims(tf.tanh(erase_add_list[i * 2 + 1]), axis=1) M = M * (tf.ones(M.get_shape()) - tf.matmul(w, erase_vector)) + tf.matmul(w, add_vector) if not self.output_dim: output_dim = x.get_shape()[1] else: output_dim = self.output_dim with tf.compat.v1.variable_scope("o2o", reuse=(self.step > 0) or self.reuse): NTM_output = tf.compat.v1.layers.dense( tf.concat([controller_output] + read_vector_list, axis=1), output_dim, activation=None, kernel_initializer=self.o2o_initializer) NTM_output = tf.clip_by_value(NTM_output, -self.clip_value, self.clip_value) self.step += 1 return NTM_output, NTMControllerState( controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list, M=M)
def _init_neural_network(self): """Initialize the RNNLM network.""" with tf.variable_scope(self.scope_name): # TODO dropout # I/O placeholders self._inputs = tf.placeholder(tf.int32, [None, self.max_sent_len], name='inputs') self._targets = tf.placeholder(tf.int32, [None, self.max_sent_len], name='targets') # RNN cell type if self.cell_type.startswith('gru'): self._cell = tf.nn.rnn_cell.GRUCell(self.emb_size) else: self._cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_size) if re.match(r'/[0-9]$', self.cell_type): self._cell = tf.nn.rnn_cell.MultiRNNCell( [self.cell] * int(self.cell_type[-1])) self._initial_state = self._cell.zero_state( tf.shape(self._inputs)[0], tf.float32) # embeddings emb_cell = tf.nn.rnn_cell.EmbeddingWrapper(self._cell, self.vocab_size) # RNN encoder inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, self.max_sent_len, self._inputs) ] outputs, states = tf.nn.rnn(emb_cell, inputs, initial_state=self._initial_state) # output layer output = tf.reshape(tf.concat(1, outputs), [-1, self.emb_size]) self._logits = (tf.matmul( output, tf.get_variable("W", [self.emb_size, self.vocab_size])) + tf.get_variable("b", [self.vocab_size])) # cost targets_1d = tf.reshape(self._targets, [-1]) self._loss = tf.nn.seq2seq.sequence_loss_by_example( [self._logits], [targets_1d], [tf.ones_like(targets_1d, dtype=tf.float32)], self.vocab_size) self._cost = tf.reduce_mean(self._loss) # optimizer self._learning_rate = tf.placeholder(tf.float32, name="learning_rate") if self.optimizer_type == 'sgd': opt = tf.train.GradientDescentOptimizer(self._learning_rate) if self.optimizer_type == 'adagrad': opt = tf.train.AdagradOptimizer(self._learning_rate) else: opt = tf.train.AdamOptimizer(self._learning_rate) # gradient clipping grads_tvars = opt.compute_gradients(self._loss, tf.trainable_variables()) grads, _ = tf.clip_by_global_norm([g for g, _ in grads_tvars], self.max_grad_norm) self._train_func = opt.apply_gradients( zip(grads, [v for _, v in grads_tvars])) # initialize TF session session_config = None if self.max_cores: session_config = tf.ConfigProto( inter_op_parallelism_threads=self.max_cores, intra_op_parallelism_threads=self.max_cores) self.session = tf.Session(config=session_config)
def _set_up_input_pls(self): """Sets up input placeholders by adding them to self._placeholders. Keys are defined as self.PL_*. """ # Combine config data bev_dims = np.append(self._bev_pixel_size, self._bev_depth) with tf.variable_scope('bev_input'): # Placeholder for BEV image input, to be filled in with feed_dict bev_input_placeholder = self._add_placeholder( tf.float32, bev_dims, self.PL_BEV_INPUT) self._bev_input_batches = tf.expand_dims(bev_input_placeholder, axis=0) self._bev_preprocessed = \ self._bev_feature_extractor.preprocess_input( self._bev_input_batches, self._bev_pixel_size) # Summary Images bev_summary_images = tf.split(bev_input_placeholder, self._bev_depth, axis=2) tf.summary.image("bev_maps", bev_summary_images, max_outputs=self._bev_depth) with tf.variable_scope('img_input'): # Take variable size input images img_input_placeholder = self._add_placeholder( tf.float32, [None, None, self._img_depth], self.PL_IMG_INPUT) self._img_input_batches = tf.expand_dims(img_input_placeholder, axis=0) self._img_preprocessed = \ self._img_feature_extractor.preprocess_input( self._img_input_batches, self._img_pixel_size) # Summary Image tf.summary.image("rgb_image", self._img_preprocessed, max_outputs=2) with tf.variable_scope('pl_labels'): self._add_placeholder(tf.float32, [None, 6], self.PL_LABEL_ANCHORS) self._add_placeholder(tf.float32, [None, 7], self.PL_LABEL_BOXES_3D) self._add_placeholder(tf.float32, [None], self.PL_LABEL_CLASSES) # Placeholders for anchors with tf.variable_scope('pl_anchors'): self._add_placeholder(tf.float32, [None, 6], self.PL_ANCHORS) self._add_placeholder(tf.float32, [None], self.PL_ANCHOR_IOUS) self._add_placeholder(tf.float32, [None, 6], self.PL_ANCHOR_OFFSETS) self._add_placeholder(tf.float32, [None], self.PL_ANCHOR_CLASSES) with tf.variable_scope('bev_anchor_projections'): self._add_placeholder(tf.float32, [None, 4], self.PL_BEV_ANCHORS) self._bev_anchors_norm_pl = self._add_placeholder( tf.float32, [None, 4], self.PL_BEV_ANCHORS_NORM) with tf.variable_scope('img_anchor_projections'): self._add_placeholder(tf.float32, [None, 4], self.PL_IMG_ANCHORS) self._img_anchors_norm_pl = self._add_placeholder( tf.float32, [None, 4], self.PL_IMG_ANCHORS_NORM) with tf.variable_scope('sample_info'): # the calib matrix shape is (3 x 4) self._add_placeholder(tf.float32, [3, 4], self.PL_CALIB_P2) self._add_placeholder(tf.int32, shape=[1], name=self.PL_IMG_IDX) self._add_placeholder(tf.float32, [4], self.PL_GROUND_PLANE)
def __init__(self, is_training, args): self.batch_size = batch_size = np.int32(args['max_group_size']) self.num_steps = num_steps = np.int32(args['num_steps']) self.num_features = num_features = args['num_features'] self.dense_units = dense_units = 1 self.hidden_size = size = np.int32(args['hidden_size']) if self.num_steps < 1: print 'num_steps cannot be zero -- setting to 1' self.num_steps = num_steps = 1 if self.hidden_size < 10: print 'hidden_size should not be less than 10 -- setting to 10' self.hidden_size = size = 10 if is_training: print 'Initiating input tensors of shape: {}'.format( (num_steps, batch_size, num_features)) self._input_data = inputs = tf.placeholder( tf.float32, [num_steps, batch_size, num_features]) self._targets = tf.placeholder(tf.float32, [num_steps, batch_size]) # Memory cell to use in model lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=size, forget_bias=0.0, state_is_tuple=True) if is_training: print 'Memory Cell: {}'.format(type(lstm_cell)) # Wrap the memory cell in a dropout layer (for outputs) if is_training and args['keep_prob'] < 1: lstm_cell = tf.nn.rnn_cell.DropoutWrapper( cell=lstm_cell, output_keep_prob=args['keep_prob']) # Create the RNN with 'num_layers' layers stacked_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * args['num_layers'], state_is_tuple=True) # Initialize the state -- it will hold the last output, h_t, as well as the memory state, c_t # Shape will be [batch_size, num_units x 2] -- splitting on dimension 1 will separate output and memory self._initial_state = stacked_cell.zero_state( batch_size=tf.constant(batch_size), dtype=tf.float32) # Split the inputs (by timestep) inputs = [ tf.squeeze(input, [0]) for input in tf.split(0, num_steps, inputs) ] # Computes dropout for inputs if is_training and args['keep_prob'] < 1: inputs = [tf.nn.dropout(x, args['keep_prob']) for x in inputs] # Run inputs through the RNN outputs, state = tf.nn.rnn(stacked_cell, inputs, initial_state=self._initial_state) # Re-joins all output tensors (from each timestep) output = tf.reshape(tf.concat(1, outputs), shape=[-1, size]) # Add a fully-connected layer self.dense_w = dense_w = tf.get_variable('dense_w', shape=[size, dense_units]) self.dense_b = dense_b = tf.get_variable('dense_b', shape=[dense_units]) # Feed the output from the RNN to the fully-connected layer self._predictions = predictions = tf.matmul(output, dense_w) + dense_b self._predictions = predictions = tf.reshape( self.predictions, shape=[num_steps, batch_size]) # Compute the R^2 numerator = tf.reduce_sum( tf.square(tf.sub(self.targets, self.predictions))) denominator = tf.reduce_sum( tf.square(tf.sub(self.targets, tf.reduce_mean(self.targets)))) self.r2 = r2 = tf.sub(1.0, tf.div(numerator, denominator)) # MSE cost function self._cost = cost = tf.reduce_mean( tf.square(tf.sub(self.targets, self.predictions))) self._final_state = state # Variable for state (for when saving model) self.save_state = tf.Variable( tf.zeros([args['num_layers'], 2, batch_size, size])) self.save_state.assign(state) if is_training: self._lr = tf.Variable(0.0, trainable=False) # Compute the gradients tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(t_list=tf.gradients(cost, tvars), clip_norm=args['max_grad_norm']) # Adjust the parameters based on optimizer and gradients optimizer = args['optimizer'](self.lr) self._train_op = optimizer.apply_gradients( grads_and_vars=zip(grads, tvars)) # Summaries for Tensorboard cost_summ = tf.scalar_summary('mean squared error', cost) r2_summ = tf.scalar_summary('r-squared', r2) state_summ = tf.histogram_summary('states', state) pred_summ = tf.histogram_summary('predictions', predictions) self.summary = tf.merge_all_summaries() else: # Ignore this -- put here so errors are prevented when running model not in training mode self.summary = predictions return
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") local_device_protos = device_lib.list_local_devices() gpus = [x.name for x in local_device_protos if x.device_type == "GPU"] gpus = gpus[:FLAGS.num_gpu] num_gpus = len(gpus) if num_gpus > 0: logging.info("Using the following GPUs to train: " + str(gpus)) num_towers = num_gpus device_string = "/gpu:%d" else: logging.info("No GPUs found. Training on CPU.") num_towers = 1 device_string = "/cpu:%d" learning_rate = tf.train.exponential_decay(base_learning_rate, global_step * batch_size * num_towers, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar("learning_rate", learning_rate) optimizer = optimizer_class(learning_rate) input_data_dict = (get_input_data_tensors(reader, train_data_pattern, batch_size=batch_size * num_towers, num_readers=num_readers, num_epochs=num_epochs)) model_input_raw = input_data_dict["video_matrix"] labels_batch = input_data_dict["labels"] num_frames = input_data_dict["num_frames"] print("model_input_shape, ", model_input_raw.shape) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) tower_inputs = tf.split(model_input, num_towers) tower_labels = tf.split(labels_batch, num_towers) tower_num_frames = tf.split(num_frames, num_towers) tower_gradients = [] tower_predictions = [] tower_label_losses = [] tower_reg_losses = [] for i in range(num_towers): # For some reason these 'with' statements can't be combined onto the same # line. They have to be nested. with tf.device(device_string % i): with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)): with (slim.arg_scope( [slim.model_variable, slim.variable], device="/cpu:0" if num_gpus != 1 else "/gpu:0")): result = model.create_model(tower_inputs[i], num_frames=tower_num_frames[i], vocab_size=reader.num_classes, labels=tower_labels[i]) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] tower_predictions.append(predictions) if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss( predictions, tower_labels[i]) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) tower_reg_losses.append(reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) tower_label_losses.append(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss gradients = optimizer.compute_gradients( final_loss, colocate_gradients_with_ops=False) tower_gradients.append(gradients) label_loss = tf.reduce_mean(tf.stack(tower_label_losses)) tf.summary.scalar("label_loss", label_loss) if regularization_penalty != 0: reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses)) tf.summary.scalar("reg_loss", reg_loss) merged_gradients = utils.combine_gradients(tower_gradients) if clip_gradient_norm > 0: with tf.name_scope("clip_grads"): merged_gradients = utils.clip_gradient_norms( merged_gradients, clip_gradient_norm) train_op = optimizer.apply_gradients(merged_gradients, global_step=global_step) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", tf.concat(tower_predictions, 0)) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op)
def get_q_values_op(self, state, past_a, seq_len, h_state, scope, reuse=False): """ Returns Q values for all actions Args: state: (tf tensor) shape = (batch_size, seq_len, img_w, img_h, nchannel) goal_state: (tf tensor) shape = (batch_size, 1, img_w, img_h, nchannel, 4) past_a: (tf tensor) shape = (batch_size*seq_len,) seq_len: (tf tensor) shape = (batch_size,) h_state: (tf tensor) shape = (batch_size, h_size) scope: (string) scope name, that specifies if target network or not reuse: (bool) reuse of variables in the scope Returns: out: (tf tensor) of shape = (batch_size * seq_len, num_actions) h_state_out: (tf tensor) of shape = (batch_size, h_size) """ num_actions = 2 h_size = self.config.h_size max_seq_len = tf.shape(state)[1] state_shape = list( [4 * 3, 3, len(self.env.state.xmap.item_class_id) + 2]) past_a = tf.reshape(tf.one_hot(past_a, num_actions), shape=(-1, max_seq_len, 1, num_actions)) past_a = tf.tile(past_a, multiples=[1, 1, 4, 1]) out = tf.reshape(state, shape=(-1, max_seq_len, 4, np.int32(state_shape[0] * state_shape[1] * state_shape[2] / 4))) with tf.variable_scope(scope, reuse=False): #### recurrent out = tf.concat([out, past_a], axis=3) out = layers.fully_connected(layers.fully_connected(out, 200), 100) out = tf.reduce_max(out, axis=2) lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=h_size) out, h_state_out = tf.nn.dynamic_rnn(inputs=out, cell=lstm_cell, sequence_length=seq_len, dtype=tf.float32, initial_state=h_state) out = tf.reshape(out, shape=[-1, h_size]) #### feed forward ''' out = layers.fully_connected(layers.fully_connected(out, 200), 100) out = tf.reduce_max(out, axis=2) out = tf.reshape(out, shape=[-1,100]) h_state_out = h_state ''' streamA, streamV = tf.split(out, 2, axis=1) advantage = layers.fully_connected( streamA, num_actions, activation_fn=None, weights_initializer=layers.xavier_initializer(), biases_initializer=tf.zeros_initializer()) value = layers.fully_connected( streamV, 1, activation_fn=None, weights_initializer=layers.xavier_initializer(), biases_initializer=tf.zeros_initializer()) out = value + tf.subtract( advantage, tf.reduce_mean(advantage, axis=1, keep_dims=True)) return out, h_state_out
def hsplit(x, n_splits): return _tf.split(x, num_or_size_splits=n_splits, axis=1)
def _module_fn(): """ Function building the module """ feature_layer = tf.placeholder( tf.float32, shape=[None, None, None, None, nchannels], name='input') obs_layer = tf.placeholder(tf.float32, shape=[None, None, None, None, n_y], name='observations') # Builds the neural network net = slim.conv3d(feature_layer, 16, 5, activation_fn=tf.nn.leaky_relu, padding='valid') #net = wide_resnet(feature_layer, 8, activation_fn=tf.nn.leaky_relu, is_training=is_training) net = wide_resnet(net, 16, activation_fn=tf.nn.leaky_relu, keep_prob=dropout, is_training=is_training) net = wide_resnet(net, 32, activation_fn=tf.nn.leaky_relu, keep_prob=dropout, is_training=is_training) net = wide_resnet(net, 32, activation_fn=tf.nn.leaky_relu, keep_prob=dropout, is_training=is_training) net = slim.conv3d(net, 32, 3, activation_fn=tf.nn.tanh) # Define the probabilistic layer net = slim.conv3d(net, n_mixture * 3 * n_y, 1, activation_fn=None) cube_size = tf.shape(obs_layer)[1] net = tf.reshape( net, [-1, cube_size, cube_size, cube_size, n_y, n_mixture * 3]) # net = tf.reshape(net, [None, None, None, None, n_y, n_mixture*3]) loc, unconstrained_scale, logits = tf.split(net, num_or_size_splits=3, axis=-1) scale = tf.nn.softplus(unconstrained_scale) # Form mixture of discretized logistic distributions. Note we shift the # logistic distribution by -0.5. This lets the quantization capture "rounding" # intervals, `(x-0.5, x+0.5]`, and not "ceiling" intervals, `(x-1, x]`. discretized_logistic_dist = tfd.QuantizedDistribution( distribution=tfd.TransformedDistribution( distribution=tfd.Logistic(loc=loc, scale=scale), bijector=tfb.AffineScalar(shift=-0.5)), low=0., high=2.**3 - 1) mixture_dist = tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(logits=logits), components_distribution=discretized_logistic_dist) # Define a function for sampling, and a function for estimating the log likelihood sample = tf.squeeze(mixture_dist.sample()) loglik = mixture_dist.log_prob(obs_layer) hub.add_signature(inputs={ 'features': feature_layer, 'labels': obs_layer }, outputs={ 'sample': sample, 'loglikelihood': loglik })
def _create_svgd_update(self): """Create a minimization operation for policy update (SVGD).""" actions = self.policy.actions_for( observations=self._observations_ph, n_action_samples=self._kernel_n_particles, reuse=True) assert_shape(actions, [None, self._kernel_n_particles, self._action_dim]) # SVGD requires computing two empirical expectations over actions # (see Appendix C1.1.). To that end, we first sample a single set of # actions, and later split them into two sets: `fixed_actions` are used # to evaluate the expectation indexed by `j` and `updated_actions` # the expectation indexed by `i`. n_updated_actions = int( self._kernel_n_particles * self._kernel_update_ratio) n_fixed_actions = self._kernel_n_particles - n_updated_actions fixed_actions, updated_actions = tf.split( actions, [n_fixed_actions, n_updated_actions], axis=1) fixed_actions = tf.stop_gradient(fixed_actions) assert_shape(fixed_actions, [None, n_fixed_actions, self._action_dim]) assert_shape(updated_actions, [None, n_updated_actions, self._action_dim]) svgd_target_values = self.qf.output_for( self._observations_ph[:, None, :], fixed_actions, reuse=True) # Target log-density. Q_soft in Equation 13: squash_correction = tf.reduce_sum( tf.log(1 - fixed_actions**2 + EPS), axis=-1) log_p = svgd_target_values + squash_correction grad_log_p = tf.gradients(log_p, fixed_actions)[0] grad_log_p = tf.expand_dims(grad_log_p, axis=2) grad_log_p = tf.stop_gradient(grad_log_p) assert_shape(grad_log_p, [None, n_fixed_actions, 1, self._action_dim]) kernel_dict = self._kernel_fn(xs=fixed_actions, ys=updated_actions) # Kernel function in Equation 13: kappa = tf.expand_dims(kernel_dict["output"], dim=3) assert_shape(kappa, [None, n_fixed_actions, n_updated_actions, 1]) # Stein Variational Gradient in Equation 13: action_gradients = tf.reduce_mean( kappa * grad_log_p + kernel_dict["gradient"], reduction_indices=1) assert_shape(action_gradients, [None, n_updated_actions, self._action_dim]) # Propagate the gradient through the policy network (Equation 14). gradients = tf.gradients( updated_actions, self.policy.get_params_internal(), grad_ys=action_gradients) surrogate_loss = tf.reduce_sum([ tf.reduce_sum(w * tf.stop_gradient(g)) for w, g in zip(self.policy.get_params_internal(), gradients) ]) if self._train_policy: optimizer = tf.train.AdamOptimizer(self._policy_lr) svgd_training_op = optimizer.minimize( loss=-surrogate_loss, var_list=self.policy.get_params_internal()) self._training_ops.append(svgd_training_op)
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. This function reconstructs the "second half" of the PNASNet network after the part defined in `_extract_proposal_features`. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name. Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ del scope # Number of used stem cells. num_stem_cells = 2 # Note that we always feed into 2 layers of equal depth # where the first N channels corresponds to previous hidden layer # and the second N channels correspond to the final hidden layer. hidden_previous, hidden = tf.split(proposal_feature_maps, 2, axis=3) # Note that what follows is largely a copy of build_pnasnet_large() within # pnasnet.py. We are copying to minimize code pollution in slim. # TODO(shlens,skornblith): Determine the appropriate drop path schedule. # For now the schedule is the default (1.0->0.7 over 250,000 train steps). hparams = pnasnet.large_imagenet_config() if not self._is_training: hparams.set_hparam('drop_path_keep_prob', 1.0) # Calculate the total number of cells in the network total_num_cells = hparams.num_cells + num_stem_cells normal_cell = pnasnet.PNasNetNormalCell(hparams.num_conv_filters, hparams.drop_path_keep_prob, total_num_cells, hparams.total_training_steps) with arg_scope([slim.dropout, nasnet_utils.drop_path], is_training=self._is_training): with arg_scope([slim.batch_norm], is_training=self._train_batch_norm): with arg_scope([ slim.avg_pool2d, slim.max_pool2d, slim.conv2d, slim.batch_norm, slim.separable_conv2d, nasnet_utils.factorized_reduction, nasnet_utils.global_avg_pool, nasnet_utils.get_channel_index, nasnet_utils.get_channel_dim ], data_format=hparams.data_format): # This corresponds to the cell number just past 'Cell_7' used by # _extract_proposal_features(). start_cell_num = 8 true_cell_num = start_cell_num + num_stem_cells with slim.arg_scope(pnasnet.pnasnet_large_arg_scope()): net = _build_pnasnet_base( hidden_previous, hidden, normal_cell=normal_cell, hparams=hparams, true_cell_num=true_cell_num, start_cell_num=start_cell_num) proposal_classifier_features = net return proposal_classifier_features
def build_computation_graph(self): """ notes on notation: Symbolic variables have the prefix sy_, to distinguish them from the numerical values that are computed later in the function prefixes and suffixes: ob - observation ac - action _no - this tensor should have shape (batch self.size /n/, observation dim) _na - this tensor should have shape (batch self.size /n/, action dim) _n - this tensor should have shape (batch self.size /n/) Note: batch self.size /n/ is defined at runtime, and until then, the shape for that axis is None ---------------------------------------------------------------------------------- loss: a function of self.sy_lp_n and self.sy_adv_n that we will differentiate to get the policy gradient. """ self.sy_ob_no, self.sy_ac_na, self.sy_adv_n, self.sy_hidden, self.sy_lp_n, self.sy_fixed_lp_n = self.define_placeholders( ) # The policy takes in an observation and produces a distribution over the action space policy_outputs = self.policy_forward_pass(self.sy_ob_no, self.sy_hidden) self.policy_parameters = policy_outputs[:-1] # unpack mean and variance self.policy_parameters = tf.split(self.policy_parameters[0], 2, axis=1) # We can sample actions from this action distribution. # This will be called in Agent.sample_trajectory() where we generate a rollout. self.sy_sampled_ac = self.sample_action(self.policy_parameters) # We can also compute the logprob of the actions that were actually taken by the policy # This is used in the loss function. self.sy_lp_n = self.get_log_prob(self.policy_parameters, self.sy_ac_na) # PPO critic update critic_regularizer = tf.contrib.layers.l2_regularizer( 1e-3) if self.l2reg else None self.critic_prediction = tf.squeeze( build_critic(self.sy_ob_no, self.sy_hidden, 1, 'critic_network', n_layers=self.n_layers, size=self.size, gru_size=self.gru_size, recurrent=self.recurrent, regularizer=critic_regularizer)) self.sy_target_n = tf.placeholder(shape=[None], name="critic_target", dtype=tf.float32) self.critic_loss = tf.losses.mean_squared_error( self.sy_target_n, self.critic_prediction) self.critic_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='critic_network') self.critic_update_op = tf.train.AdamOptimizer( self.learning_rate).minimize(self.critic_loss) # PPO actor update self.sy_fixed_log_prob_n = tf.placeholder(shape=[None], name="fixed_log_prob", dtype=tf.float32) self.policy_surr_loss = self.ppo_loss(self.sy_lp_n, self.sy_fixed_lp_n, self.sy_adv_n) self.policy_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.scope) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.policy_update_op = minimize_and_clip(optimizer, self.policy_surr_loss, var_list=self.policy_weights, clip_val=40)
def __init__(self, num_words, num_chars, num_classes, num_steps, word_len, embedding_matrix=None): # Parameter self.config = Config() self.dropout_rate = self.config.model_para['dropout_rate'] self.batch_size = self.config.model_para['batch_size'] self.num_layers = self.config.model_para['lstm_layer_num'] self.input_dim = self.config.model_para['input_dim'] self.hidden_dim = self.config.model_para['hidden_dim'] self.char_input_dim = self.config.model_para['char_input_dim'] self.char_hidden_dim = self.config.model_para['char_hidden_dim'] self.use_pa_learning = self.config.model_para['use_pa_learning'] self.embedding_matrix = embedding_matrix self.word_len = word_len self.num_steps = num_steps self.num_words = num_words self.num_chars = num_chars self.num_classes = num_classes self.char_inputs = tf.placeholder(tf.int32, [None, self.word_len]) with tf.variable_scope("character-based-emb"): # char embedding self.char_embedding = tf.get_variable("char_emb", [self.num_chars, self.char_input_dim]) self.char_inputs_emb = tf.nn.embedding_lookup(self.char_embedding, self.char_inputs) self.char_inputs_emb = tf.transpose(self.char_inputs_emb, [1, 0, 2]) self.char_inputs_emb = tf.reshape(self.char_inputs_emb, [-1, self.char_input_dim]) self.char_inputs_emb = tf.split(self.char_inputs_emb, self.word_len, 0) # char forward and backward with tf.variable_scope("char-bi-lstm"): # char lstm cell char_lstm_cell_fw = rnn.LSTMCell(self.char_hidden_dim) char_lstm_cell_bw = rnn.LSTMCell(self.char_hidden_dim) # get the length of each word self.word_length = tf.reduce_sum(tf.sign(self.char_inputs), reduction_indices=1) self.word_length = tf.cast(self.word_length, tf.int32) char_outputs, f_output, r_output = tf.contrib.rnn.static_bidirectional_rnn( char_lstm_cell_fw, char_lstm_cell_bw, self.char_inputs_emb, dtype=tf.float32, sequence_length=self.word_length ) final_word_output = tf.concat([f_output.h, r_output.h], -1) self.word_lstm_last_output = tf.reshape(final_word_output, [-1, self.num_steps, self.char_hidden_dim*2]) # ''' # word input # ''' with tf.variable_scope("word-based-emb"): self.inputs = tf.placeholder(tf.int32, [None, self.num_steps]) # self.targets = tf.placeholder(tf.int32, [None, self.num_steps]) if self.use_pa_learning: self.targets = tf.placeholder(tf.float32, [None, self.num_steps+2, self.num_classes+1]) else: self.targets = tf.placeholder(tf.int32, [None, self.num_steps]) self.targets_transition = tf.placeholder(tf.int32, [None]) self.keep_prob = tf.placeholder(tf.float32) if embedding_matrix is not None: self.embedding = tf.Variable(embedding_matrix, trainable=True, name="word_emb", dtype=tf.float32) else: self.embedding = tf.get_variable("word_emb", [self.num_words, self.input_dim]) self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs) self.inputs_emb = tf.concat([self.inputs_emb, self.word_lstm_last_output], -1) self.inputs_emb = tf.nn.dropout(self.inputs_emb, self.keep_prob) self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2]) self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim+self.char_hidden_dim*2]) self.inputs_emb = tf.split(self.inputs_emb, self.num_steps, 0) # word lstm cell lstm_cell_fw = rnn.LSTMCell(self.hidden_dim) lstm_cell_bw = rnn.LSTMCell(self.hidden_dim) # get the length of each sample self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1) self.length = tf.cast(self.length, tf.int32) # forward and backward with tf.variable_scope("word-bi-lstm"): self.outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( lstm_cell_fw, lstm_cell_bw, self.inputs_emb, dtype=tf.float32, sequence_length=self.length ) # bidirect concat final_outputs = tf.reshape(tf.concat(self.outputs, 1), [-1, self.hidden_dim * 2]) tanh_layer_w = tf.get_variable("tanh_layer_w", [self.hidden_dim * 2, self.hidden_dim]) tanh_layer_b = tf.get_variable("tanh_layer_b", [self.hidden_dim]) self.final_outputs = tf.tanh(tf.matmul(final_outputs, tanh_layer_w) + tanh_layer_b) # def add_placeholders(self): # ''' # char input = sen_batch * sen_len # ''' # self.char_inputs = tf.placeholder(tf.int32, [None, self.word_len]) # ''' # word input # ''' # self.inputs = tf.placeholder(tf.int32, [None, self.num_steps]) # self.targets = tf.placeholder(tf.int32, [None, self.num_steps]) # self.targets_transition = tf.placeholder(tf.int32, [None]) # self.keep_prob = tf.placeholder(tf.float32) # def add_lookup_op(self): # with tf.variable_scope("character-based-emb"): # # char embedding # self.char_embedding = tf.get_variable("char_emb", [self.num_chars, self.char_input_dim]) # self.char_inputs_emb = tf.nn.embedding_lookup(self.char_embedding, self.char_inputs) # with tf.variable_scope("word-based-emb"): # if self.embedding_matrix is not None: # self.embedding = tf.Variable(self.embedding_matrix, trainable=True, name="word_emb", dtype=tf.float32) # else: # self.embedding = tf.get_variable("word_emb", [self.num_words, self.input_dim]) # self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs) # def add_feature_extractor_op(self): # with tf.variable_scope("char_bi-lstm"): # self.char_inputs_emb = tf.transpose(self.char_inputs_emb, [1, 0, 2]) # self.char_inputs_emb = tf.reshape(self.char_inputs_emb, [-1, self.char_input_dim]) # self.char_inputs_emb = tf.split(self.char_inputs_emb, self.word_len, 0) # # char lstm cell # char_lstm_cell_fw = rnn.LSTMCell(self.char_hidden_dim) # char_lstm_cell_bw = rnn.LSTMCell(self.char_hidden_dim) # # get the length of each word # self.word_length = tf.reduce_sum(tf.sign(self.char_inputs), reduction_indices=1) # self.word_length = tf.cast(self.word_length, tf.int32) # char_outputs, f_output, r_output = tf.contrib.rnn.static_bidirectional_rnn( # char_lstm_cell_fw, # char_lstm_cell_bw, # self.char_inputs_emb, # dtype=tf.float32, # sequence_length=self.word_length # ) # final_word_output = tf.concat([f_output.h, r_output.h], -1) # self.word_lstm_last_output = tf.reshape(final_word_output, [-1, self.num_steps, self.char_hidden_dim*2]) # with tf.variable_scope("word_bi-lstm"): # self.inputs_emb = tf.concat([self.inputs_emb, self.word_lstm_last_output], -1) # self.inputs_emb = tf.nn.dropout(self.inputs_emb, self.keep_prob) # self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2]) # self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim+self.char_hidden_dim*2]) # # self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.input_dim]) # self.inputs_emb = tf.split(self.inputs_emb, self.num_steps, 0) # # word lstm cell # lstm_cell_fw = rnn.LSTMCell(self.hidden_dim) # lstm_cell_bw = rnn.LSTMCell(self.hidden_dim) # # get the length of each sample # self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1) # self.length = tf.cast(self.length, tf.int32) # self.outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn( # lstm_cell_fw, # lstm_cell_bw, # self.inputs_emb, # dtype=tf.float32, # sequence_length=self.length # ) # with tf.variable_scope("bidirect-concat"): # final_outputs = tf.reshape(tf.concat(self.outputs, 1), [-1, self.hidden_dim * 2]) # tanh_layer_w = tf.get_variable("tanh_layer_w", [self.hidden_dim * 2, self.hidden_dim]) # tanh_layer_b = tf.get_variable("tanh_layer_b", [self.hidden_dim]) # self.final_outputs = tf.tanh(tf.matmul(final_outputs, tanh_layer_w) + tanh_layer_b) # def forward(self): # self.add_placeholders() # self.add_lookup_op() # self.add_feature_extractor_op() # return self.final_outputs, self.length
def run(self, *in_arrays: Tuple[Union[np.ndarray, None], ...], input_transform: dict = None, output_transform: dict = None, return_as_list: bool = False, print_progress: bool = False, minibatch_size: int = None, num_gpus: int = 1, assume_frozen: bool = False, **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). Args: input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. The dict must contain a 'func' field that points to a top-level function. The function is called with the input TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. The dict must contain a 'func' field that points to a top-level function. The function is called with the output TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. print_progress: Print progress to the console? Useful for very large input arrays. minibatch_size: Maximum minibatch size to use, None = disable batching. num_gpus: Number of GPUs to use. assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. dynamic_kwargs: Additional keyword arguments to be passed into the network build function. """ assert len(in_arrays) == self.num_inputs assert not all(arr is None for arr in in_arrays) assert input_transform is None or util.is_top_level_function(input_transform["func"]) assert output_transform is None or util.is_top_level_function(output_transform["func"]) output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) num_items = in_arrays[0].shape[0] if minibatch_size is None: minibatch_size = num_items # Construct unique hash key from all arguments that affect the TensorFlow graph. key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) def unwind_key(obj): if isinstance(obj, dict): return [(key, unwind_key(value)) for key, value in sorted(obj.items())] if callable(obj): return util.get_top_level_function_name(obj) return obj key = repr(unwind_key(key)) # Build graph. if key not in self._run_cache: with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): with tf.device("/cpu:0"): in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) out_split = [] for gpu in range(num_gpus): with tf.device("/gpu:%d" % gpu): net_gpu = self.clone() if assume_frozen else self in_gpu = in_split[gpu] if input_transform is not None: in_kwargs = dict(input_transform) in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) # assert len(in_gpu) == self.num_inputs out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) if output_transform is not None: out_kwargs = dict(output_transform) out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) # assert len(out_gpu) == self.num_outputs out_split.append(out_gpu) with tf.device("/cpu:0"): out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] self._run_cache[key] = in_expr, out_expr # Run minibatches. in_expr, out_expr = self._run_cache[key] out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr] for mb_begin in range(0, num_items, minibatch_size): if print_progress: print("\r%d / %d" % (mb_begin, num_items), end="") mb_end = min(mb_begin + minibatch_size, num_items) mb_num = mb_end - mb_begin mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) for dst, src in zip(out_arrays, mb_out): dst[mb_begin: mb_end] = src # Done. if print_progress: print("\r%d / %d" % (num_items, num_items)) if not return_as_list: out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) return out_arrays
def __init__(self, vgg16_npy_path=None, restore_from=None): # pre-trained parameters try: self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item() except FileNotFoundError: print( 'Please download VGG16 parameters at here https://mega.nz/#!YU1FWJrA!O1ywiCS2IiOlUCtCpI6HTJOMrneN-Qdv3ywQP5poecM' ) self.tfx = tf.placeholder(tf.float32, [None, 224, 224, 3]) self.tfy = tf.placeholder(tf.float32, [None, 1]) # Convert RGB to BGR red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=self.tfx * 255.0) bgr = tf.concat(axis=3, values=[ blue - self.vgg_mean[0], green - self.vgg_mean[1], red - self.vgg_mean[2], ]) # pre-trained VGG layers are fixed in fine-tune conv1_1 = self.conv_layer(bgr, "conv1_1") conv1_2 = self.conv_layer(conv1_1, "conv1_2") pool1 = self.max_pool(conv1_2, 'pool1') conv2_1 = self.conv_layer(pool1, "conv2_1") conv2_2 = self.conv_layer(conv2_1, "conv2_2") pool2 = self.max_pool(conv2_2, 'pool2') conv3_1 = self.conv_layer(pool2, "conv3_1") conv3_2 = self.conv_layer(conv3_1, "conv3_2") conv3_3 = self.conv_layer(conv3_2, "conv3_3") pool3 = self.max_pool(conv3_3, 'pool3') conv4_1 = self.conv_layer(pool3, "conv4_1") conv4_2 = self.conv_layer(conv4_1, "conv4_2") conv4_3 = self.conv_layer(conv4_2, "conv4_3") pool4 = self.max_pool(conv4_3, 'pool4') conv5_1 = self.conv_layer(pool4, "conv5_1") conv5_2 = self.conv_layer(conv5_1, "conv5_2") conv5_3 = self.conv_layer(conv5_2, "conv5_3") pool5 = self.max_pool(conv5_3, 'pool5') # detach original VGG fc layers and # reconstruct your own fc layers serve for your own purpose self.flatten = tf.reshape(pool5, [-1, 7 * 7 * 512]) self.fc6 = tf.layers.dense(self.flatten, 256, tf.nn.relu, name='fc6') self.out = tf.layers.dense(self.fc6, 1, name='out') self.sess = tf.Session() if restore_from: saver = tf.train.Saver() saver.restore(self.sess, restore_from) else: # training graph self.loss = tf.losses.mean_squared_error(labels=self.tfy, predictions=self.out) self.train_op = tf.train.RMSPropOptimizer(0.001).minimize( self.loss) self.sess.run(tf.global_variables_initializer())
def extract_features(self, preprocessed_inputs, state_saver=None, state_name='lstm_state', unroll_length=10, scope=None): """Extract features from preprocessed inputs. The features include the base network features, lstm features and SSD features, organized in the following name scope: <scope>/MobilenetV2_1/... <scope>/MobilenetV2_2/... <scope>/LSTM/... <scope>/FeatureMap/... Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of consecutive frames from video clips. state_saver: A state saver object with methods `state` and `save_state`. state_name: Python string, the name to use with the state_saver. unroll_length: number of steps to unroll the lstm. scope: Scope for the base network of the feature extractor. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: if interleave_method not recognized or large and small base network output feature maps of different sizes. """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) preprocessed_inputs = ops.pad_to_multiple( preprocessed_inputs, self._pad_to_multiple) batch_size = preprocessed_inputs.shape[0].value / unroll_length batch_axis = 0 nets = [] # Batch processing of mobilenet features. with slim.arg_scope(mobilenet_v2.training_scope( is_training=self._is_training, bn_decay=0.9997)), \ slim.arg_scope([mobilenet.depth_multiplier], min_depth=self._min_depth, divisible_by=8): # Big model. net, _ = self.extract_base_features_large(preprocessed_inputs) nets.append(net) large_base_feature_shape = net.shape # Small models net, _ = self.extract_base_features_small(preprocessed_inputs) nets.append(net) small_base_feature_shape = net.shape if not (large_base_feature_shape[1] == small_base_feature_shape[1] and large_base_feature_shape[2] == small_base_feature_shape[2]): raise ValueError('Large and Small base network feature map dimension ' 'not equal!') with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope: output_size = (large_base_feature_shape[1], large_base_feature_shape[2]) lstm_cell, init_state, step = self.create_lstm_cell( batch_size, output_size, state_saver, state_name) nets_seq = [ tf.split(net, unroll_length, axis=batch_axis) for net in nets ] net_seq, states_out = rnn_decoder.multi_input_rnn_decoder( nets_seq, init_state, lstm_cell, step, selection_strategy=self._interleave_method, is_training=self._is_training, pre_bottleneck=self._pre_bottleneck, flatten_state=self._flatten_state, scope=lstm_scope) self._states_out = states_out batcher_ops = None if state_saver is not None: self._step = state_saver.state(state_name + '_step') batcher_ops = [ state_saver.save_state(state_name + '_c', states_out[-1][0]), state_saver.save_state(state_name + '_h', states_out[-1][1]), state_saver.save_state(state_name + '_step', self._step + 1)] image_features = {} with tf_ops.control_dependencies(batcher_ops): image_features['layer_19'] = tf.concat(net_seq, 0) # SSD layers. with tf.variable_scope('FeatureMap'): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=self._feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features, pool_residual=True) return feature_maps.values()
def forward(self, x): def _NonLocalBlock(input_x, out_channels, sub_sample=1, nltype=0, is_bn=False, scope='NonLocalBlock'): """ https://github.com/nnUyi/Non-Local_Nets-Tensorflow """ batchsize, height, width, in_channels = input_x.get_shape( ).as_list() typedict = { 0: 'embedded_gaussian', 1: 'gaussian', 2: 'dot_product', 3: 'concat' } with tf.variable_scope(scope) as sc: if nltype <= 2: with tf.variable_scope('g') as scope: g = conv2d(input_x, out_channels, 1, strides=1, padding='same', name='g') if sub_sample > 1: g = average_pooling2d(g, pool_size=sub_sample, strides=sub_sample, name='g_pool') with tf.variable_scope('phi') as scope: if nltype == 0 or nltype == 2: phi = conv2d(input_x, out_channels, 1, strides=1, padding='same', name='phi') elif nltype == 1: phi = input_x if sub_sample > 1: phi = average_pooling2d(phi, pool_size=sub_sample, strides=sub_sample, name='phi_pool') with tf.variable_scope('theta') as scope: if nltype == 0 or nltype == 2: theta = conv2d(input_x, out_channels, 1, strides=1, padding='same', name='theta') elif nltype == 1: theta = input_x g_x = tf.reshape(g, [batchsize, -1, out_channels]) theta_x = tf.reshape(theta, [batchsize, -1, out_channels]) # theta_x = tf.reshape(theta, [batchsize, out_channels, -1]) # theta_x = tf.transpose(theta_x, [0,2,1]) phi_x = tf.reshape(phi, [batchsize, -1, out_channels]) phi_x = tf.transpose(phi_x, [0, 2, 1]) #phi_x = tf.reshape(phi_x, [batchsize, out_channels, -1]) f = tf.matmul(theta_x, phi_x) # ??? if nltype <= 1: # f_softmax = tf.nn.softmax(f, -1) f = tf.exp(f) f_softmax = f / tf.reduce_sum( f, axis=-1, keepdims=True) elif nltype == 2: f = tf.nn.relu(f) #/int(f.shape[-1]) f_mean = tf.reduce_sum(f, axis=[2], keepdims=True) #print(f.shape,f_mean.shape) f_softmax = f / f_mean y = tf.matmul(f_softmax, g_x) y = tf.reshape(y, [batchsize, height, width, out_channels]) with tf.variable_scope('w') as scope: w_y = conv2d(y, in_channels, 1, strides=1, padding='same', name='w') # if is_bn: # w_y = slim.batch_norm(w_y) z = w_y #input_x + w_y return z """ hyper-paras """ mf = 64 # output feature map num for most convs dk = 3 # kernel size for most convs ds = 1 # stride for most convs activate = tf.nn.leaky_relu num_block = self.num_block # progressive fusion block num ki = tf.contrib.layers.xavier_initializer() n, nf, w, h, c = x.shape # print(n, nf, w, h, c) with tf.variable_scope('network', reuse=tf.AUTO_REUSE) as scope: conv0 = Conv2D(mf, 5, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='conv0') conv1 = [ Conv2D(mf, dk, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='conv1_{}'.format(i)) for i in range(num_block) ] conv10 = [ Conv2D(mf, 1, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='conv10_{}'.format(i)) for i in range(num_block) ] conv2 = [ Conv2D(mf, dk, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='conv2_{}'.format(i)) for i in range(num_block) ] convout4 = Conv2D(256, 3, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='convout4') convout3 = Conv2D(128, 3, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='convout3') convout2 = Conv2D(64, 3, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='convout2') convout1 = Conv2D(3, 3, strides=ds, padding='same', activation=None, kernel_initializer=ki, name='convout1') """ center for residual add """ center_tensor = x[:, self.num_frames // 2, :, :, :] # (bs, h, w, c) # print(center_tensor.get_shape()) """ nonlocal + res """ inp0 = [x[:, i, :, :, :] for i in range(nf)] # [nf * (bs, h, w, c)] inp0 = tf.concat(inp0, axis=-1) # (bs, h, w, c*nf) # print(inp0.get_shape()) if self.refactor > 1: inp1 = tf.space_to_depth( inp0, self.refactor ) # space2depth: (h, w, c) -> (h//2, w//2, c*4) else: inp1 = inp0 inp1 = _NonLocalBlock(inp1, int(c) * self.num_frames * self.refactor * self.refactor, sub_sample=1, nltype=1, scope='nlblock_{}'.format(0)) if self.refactor > 1: inp1 = tf.depth_to_space( inp1, self.refactor ) # depth2space: (h//2, w//2, c*4) -> (h, w, c) inp0 += inp1 # (bs, h, w, c*nf) """ 5x5 conv """ inp0 = tf.split(inp0, num_or_size_splits=self.num_frames, axis=-1) # [nf * (bs, h, w, c)] # print(len(inp0)) # print(inp0[0].get_shape()) inp0 = [conv0(f) for f in inp0] # [nf * (bs, h, w, 64)] # print(inp0[0].get_shape()) """ progressive fusion blocks """ for i in range(num_block): inp1 = [conv1[i](f) for f in inp0] # [nf * (bs, h, w, 64)] base = tf.concat(inp1, axis=-1) # (bs, h, w, 64*nf) base = conv10[i](base) inp2 = [tf.concat([base, f], -1) for f in inp1] inp2 = [conv2[i](f) for f in inp2] inp0 = [tf.add(inp0[j], inp2[j]) for j in range(nf)] # [nf * (bs, h, w, 64)] """ merge """ merge = tf.concat(inp0, axis=-1) # (bs, h, w, 64*nf) out = convout4(merge) out = convout3(out) out = convout2(out) out = convout1(out) # (bs, h, w, c) """ residual """ return tf.stack([out + center_tensor], axis=1, name='out') # (bs, h, w, c) -> (bs, 1, h, w, c)
def _build_graph(self, inputs): # ========================== Convert Color Space ========================== lr_rgb, hr_rgb = inputs lr_y, hr_y = rgb2y(lr_rgb), rgb2y(hr_rgb) lr_ycbcr, hr_ycbcr = rgb2ycbcr(lr_rgb), rgb2ycbcr(hr_rgb) # (b, t, h, w, c) to (b, h, w, c) * t lr_y = tf.split(lr_y, cfg.frames, axis = 1) lr_y = [tf.reshape(i, (-1, h, w, 1)) for i in lr_y] lr_rgb = tf.split(lr_rgb, cfg.frames, axis = 1) lr_rgb = [tf.reshape(i, (-1, h, w, 3)) for i in lr_rgb] lr_ycbcr = tf.split(lr_ycbcr, cfg.frames, axis = 1) lr_ycbcr = [tf.reshape(i, (-1, h, w, 3)) for i in lr_ycbcr] # ========================== split ========================== # ========================== Normalization ========================== lr_y = [i / 255.0 - 0.5 for i in lr_y] lr_rgb = [i / 255.0 - 0.5 for i in lr_rgb] lr_ycbcr = [i / 255.0 - 0.5 for i in lr_ycbcr] hr_y = hr_y / 255.0 - 0.5 referenced_rgb = lr_rgb[cfg.frames // 2] referenced_y = lr_y[cfg.frames // 2] ref_ycbcr = lr_ycbcr[cfg.frames // 2] # ========================== Forward ========================== hr_sparses = [] flows = [] warped = [] coords = get_coords(h, w) with tf.variable_scope("ME_SPMC") as scope: for i, j in zip(lr_y, lr_rgb): flow_i0 = motion_estimation(referenced_y, i) * h / 2 # freeze in stage 2 if self.stage == 2: flow_i0 = tf.stop_gradient(flow_i0) flows.append(flow_i0) hr_sparses.append(spmc_layer(i, flow_i0)) mapping = coords - flow_i0 backward_warped_img = BackwardWarping('backward_warpped', [referenced_y, mapping], borderMode='constant') warped.append(backward_warped_img) scope.reuse_variables() hr_denses = detail_fusion_net(hr_sparses, ref_ycbcr[:, :, :, :1]) # ========================== Outputs ========================== flow_after_reshape = [tf.reshape(i, (-1, 1, h, w, 2)) for i in flows] tf_flows = tf.concat(flow_after_reshape, axis = 1, name = 'flows') warped_after_reshape = [tf.reshape(i, (-1, 1, h, w, 1)) for i in warped] after_warp = tf.concat(warped_after_reshape, axis = 1, name = 'after_warp') padh = int(math.ceil(h / 4.0) * 4.0 - h) padw = int(math.ceil(w / 4.0) * 4.0 - w) scale_factor = 2 # Unormalization output_y = (hr_denses[-1] + 0.5) * 255. # Unormalization, bicubic插值 output_cbcr = tf.image.resize_images( (ref_ycbcr + 0.5) * 255.0, [(h + padh) * scale_factor, (w + padw) * scale_factor], method = 2)[:, :, :, 1:3] # Y: 模型输出 Cb&Cr: bicubic插值 prediction = tf.concat([output_y, output_cbcr], axis = -1) # convert YCbCr to RGB prediction = tf.identity(ycbcr2rgb(prediction), name = 'predictions') # ========================== Cost Functions ========================== k = np.arange(*k_range, 0.5 / cfg.frames) mask_warped = [] warp_loss = [] mask_warp_loss = [] flow_loss = [] euclidean_loss = [] for i in range(cfg.frames): mapping = coords - flows[i] mask1 = tf.greater_equal(mapping[:,:,:,:1], 0.0) mask2 = tf.less_equal(mapping[:,:,:,:1], h-1) mask3 = tf.greater_equal(mapping[:,:,:,1:], 0.0) mask4 = tf.less_equal(mapping[:,:,:,1:], w-1) mask12 = tf.logical_and(mask1, mask2) mask34 = tf.logical_and(mask3, mask4) mask = tf.cast(tf.logical_and(mask12, mask34), tf.float32) mask_warped.append(self._unorm(warped[i], mask)) mask_warp_loss.append(tf.reduce_sum(mask * tf.abs(lr_y[i] - warped[i])) / tf.reduce_sum(mask) * tf.reduce_sum(tf.ones_like(mask))) warp_loss.append(tf.reduce_sum(tf.abs(lr_y[i] - warped[i]))) flow_loss.append(tf.reduce_sum(tf.abs(tf.image.total_variation(flows[i])))) euclidean_loss.append(tf.reduce_sum(tf.square(hr_y - hr_denses[i]))) loss_me_1 = tf.reduce_sum([mask_warp_loss[i] for i in range(cfg.frames)]) loss_me_2 = tf.reduce_sum([flow_loss[i] for i in range(cfg.frames)]) loss_me = loss_me_1 + cfg.lambda1 * loss_me_2 loss_sr = tf.reduce_sum([k[i] * euclidean_loss[i] for i in range(cfg.frames)]) costs = [ loss_me, loss_sr, loss_sr + cfg.lambda2 * loss_me ] self.cost = tf.identity(costs[self.stage - 1], name = 'cost') # ========================================== Summary ========================================== tf.summary.image('input', referenced_rgb, max_outputs = 3) tf.summary.image('groundtruth', hr_rgb, max_outputs=3) tf.summary.image('frame_pair_1', tf.concat([self._unorm(referenced_y), self._unorm(lr_y[0]), mask_warped[0]], axis=1), max_outputs=3) tf.summary.image('frame_pair_2', tf.concat([self._unorm(referenced_y), self._unorm(lr_y[1]), mask_warped[1]], axis=1), max_outputs=3) tf.summary.image('flow', flow_to_color(flows[0]), max_outputs=3) # tf.summary.image('flow_1', tf.concat([flows[0][:,:,:,:1], flows[0][:,:,:,1:]], axis=1), max_outputs=3) # tf.summary.image('flow_2', tf.concat([flows[1][:,:,:,:1], flows[1][:,:,:,1:]], axis=1), max_outputs=3) # tf.summary.image('reference_frame', referenced, max_outputs=3) tf.summary.image('output', prediction, max_outputs=3) add_moving_summary( # tf.identity(loss_me_1, name = 'warp_loss'), # tf.identity(loss_me_2, name = 'flow_loss'), tf.identity(loss_me, name = 'loss_me'), tf.identity(loss_sr, name = 'loss_sr'), self.cost )
def _split_tensor(values, num_splits, axis): if tf.__version__ == '0.12.0': return tf.split(axis, num_splits, values) else: return tf.split(values, num_splits, axis=axis)
def run( self, *in_arrays, return_as_list=False, # True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. print_progress=False, # Print progress to the console? Useful for very large input arrays. minibatch_size=None, # Maximum minibatch size to use, None = disable batching. num_gpus=1, # Number of GPUs to use. out_mul=1.0, # Multiplicative constant to apply to the output(s). out_add=0.0, # Additive constant to apply to the output(s). out_shrink=1, # Shrink the spatial dimensions of the output(s) by the given factor. out_dtype=None, # Convert the output to the specified data type. **dynamic_kwargs ): # Additional keyword arguments to pass into the network construction function. assert len(in_arrays) == self.num_inputs num_items = in_arrays[0].shape[0] if minibatch_size is None: minibatch_size = num_items key = str([ list(sorted(dynamic_kwargs.items())), num_gpus, out_mul, out_add, out_shrink, out_dtype ]) # Build graph. if key not in self._run_cache: with absolute_name_scope(self.scope + '/Run'), tf.control_dependencies(None): in_split = list( zip(*[tf.split(x, num_gpus) for x in self.input_templates])) out_split = [] for gpu in range(num_gpus): with tf.device('/gpu:%d' % gpu): out_expr = self.get_output_for(*in_split[gpu], return_as_list=True, **dynamic_kwargs) if out_mul != 1.0: out_expr = [x * out_mul for x in out_expr] if out_add != 0.0: out_expr = [x + out_add for x in out_expr] if out_shrink > 1: ksize = [1, 1, out_shrink, out_shrink] out_expr = [ tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') for x in out_expr ] if out_dtype is not None: if tf.as_dtype(out_dtype).is_integer: out_expr = [tf.round(x) for x in out_expr] out_expr = [ tf.saturate_cast(x, out_dtype) for x in out_expr ] out_split.append(out_expr) self._run_cache[key] = [ tf.concat(outputs, axis=0) for outputs in zip(*out_split) ] # Run minibatches. out_expr = self._run_cache[key] out_arrays = [ np.empty([num_items] + shape_to_list(expr.shape)[1:], expr.dtype.name) for expr in out_expr ] for mb_begin in range(0, num_items, minibatch_size): if print_progress: print('\r%d / %d' % (mb_begin, num_items), end='') mb_end = min(mb_begin + minibatch_size, num_items) mb_in = [src[mb_begin:mb_end] for src in in_arrays] mb_out = tf.get_default_session().run( out_expr, dict(zip(self.input_templates, mb_in))) for dst, src in zip(out_arrays, mb_out): dst[mb_begin:mb_end] = src # Done. if print_progress: print('\r%d / %d' % (num_items, num_items)) if not return_as_list: out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple( out_arrays) return out_arrays
def evaluate(n_token, cutoffs, ps_device): ##### Get input function and model function eval_input_fn, eval_record_info = data_utils.get_input_fn( record_info_dir=FLAGS.record_info_dir, split=FLAGS.eval_split, per_host_bsz=FLAGS.eval_batch_size, tgt_len=FLAGS.tgt_len, num_core_per_host=FLAGS.num_core_per_host, num_hosts=1, use_tpu=False) num_batch = eval_record_info["num_batch"] if FLAGS.max_eval_batch > 0: num_batch = FLAGS.max_eval_batch tf.logging.info("num of batches {}".format(num_batch)) ##### Create computational graph eval_set = eval_input_fn({ "batch_size": FLAGS.eval_batch_size, "data_dir": FLAGS.data_dir }) input_feed, label_feed = eval_set.make_one_shot_iterator().get_next() inputs = tf.split(input_feed, FLAGS.num_core_per_host, 0) labels = tf.split(label_feed, FLAGS.num_core_per_host, 0) per_core_bsz = FLAGS.eval_batch_size // FLAGS.num_core_per_host tower_mems, tower_losses, tower_new_mems = [], [], [] for i in range(FLAGS.num_core_per_host): with tf.device(assign_to_gpu(i, ps_device)), \ tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): mems_i = [ tf.placeholder(tf.float32, [FLAGS.mem_len, per_core_bsz, FLAGS.d_model]) for _ in range(FLAGS.n_layer) ] loss_i, new_mems_i = single_core_graph(n_token=n_token, cutoffs=cutoffs, is_training=False, inp=inputs[i], tgt=labels[i], mems=mems_i) tower_mems.append(mems_i) tower_losses.append(loss_i) tower_new_mems.append(new_mems_i) ## sum losses across towers if len(tower_losses) > 1: loss = tf.add_n(tower_losses) / len(tower_losses) else: loss = tower_losses[0] ##### Evaluation loop tower_mems_np = [[ np.zeros([FLAGS.mem_len, per_core_bsz, FLAGS.d_model], dtype=np.float32) for layer in range(FLAGS.n_layer) ] for core in range(FLAGS.num_core_per_host)] saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) if FLAGS.eval_ckpt_path is None: eval_ckpt_path = tf.train.latest_checkpoint(FLAGS.model_dir) else: eval_ckpt_path = FLAGS.eval_ckpt_path tf.logging.info("Evaluate {}".format(eval_ckpt_path)) saver.restore(sess, eval_ckpt_path) fetches = [loss, tower_new_mems, tf.size(label_feed)] format_str = " >> processing batch {{:{0}d}}/{{:{0}d}} ..".format( len(str(num_batch))) total_loss, total_cnt = 0, 0 for step in range(num_batch): if step % (num_batch // 10) == 0: tf.logging.info(format_str.format(step, num_batch)) feed_dict = {} for i in range(FLAGS.num_core_per_host): for m, m_np in zip(tower_mems[i], tower_mems_np[i]): feed_dict[m] = m_np fetched = sess.run(fetches, feed_dict=feed_dict) loss_np, tower_mems_np, cnt_np = fetched[:3] total_loss += loss_np * cnt_np total_cnt += cnt_np avg_loss = total_loss / total_cnt tf.logging.info("| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format( avg_loss, math.exp(avg_loss), avg_loss / math.log(2)))
def pretrain_model(epochs, batch_size, train_steps_per_epoch, save_dir): # step 1: prepare dataset train_data, test_data = load_data() pipeline = fe.Pipeline( train_data=train_data, batch_size=batch_size, ops=[ PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x"), # augmentation 1 RandomCrop(32, 32, image_in="x", image_out="x_aug"), Sometimes(HorizontalFlip(image_in="x_aug", image_out="x_aug"), prob=0.5), Sometimes(ColorJitter(inputs="x_aug", outputs="x_aug", brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2), prob=0.8), Sometimes(ToGray(inputs="x_aug", outputs="x_aug"), prob=0.2), Sometimes(GaussianBlur(inputs="x_aug", outputs="x_aug", blur_limit=(3, 3), sigma_limit=(0.1, 2.0)), prob=0.5), ToFloat(inputs="x_aug", outputs="x_aug"), # augmentation 2 RandomCrop(32, 32, image_in="x", image_out="x_aug2"), Sometimes(HorizontalFlip(image_in="x_aug2", image_out="x_aug2"), prob=0.5), Sometimes(ColorJitter(inputs="x_aug2", outputs="x_aug2", brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2), prob=0.8), Sometimes(ToGray(inputs="x_aug2", outputs="x_aug2"), prob=0.2), Sometimes(GaussianBlur(inputs="x_aug2", outputs="x_aug2", blur_limit=(3, 3), sigma_limit=(0.1, 2.0)), prob=0.5), ToFloat(inputs="x_aug2", outputs="x_aug2") ]) # step 2: prepare network model_con, model_finetune = fe.build(model_fn=ResNet9, optimizer_fn=["adam", "adam"]) network = fe.Network(ops=[ LambdaOp(lambda x, y: tf.concat([x, y], axis=0), inputs=["x_aug", "x_aug2"], outputs="x_com"), ModelOp(model=model_con, inputs="x_com", outputs="y_com"), LambdaOp(lambda x: tf.split(x, 2, axis=0), inputs="y_com", outputs=["y_pred", "y_pred2"]), NTXentOp(arg1="y_pred", arg2="y_pred2", outputs=["NTXent", "logit", "label"]), UpdateOp(model=model_con, loss_name="NTXent") ]) # step 3: prepare estimator traces = [ Accuracy(true_key="label", pred_key="logit", mode="train", output_name="contrastive_accuracy"), ModelSaver(model=model_con, save_dir=save_dir), ] estimator = fe.Estimator(pipeline=pipeline, network=network, epochs=epochs, traces=traces, train_steps_per_epoch=train_steps_per_epoch) estimator.fit() return model_con, model_finetune