def test_backward_grads_with_nativepy(self): if not tf.test.is_gpu_available(): self.skipTest("GPU not available") input_shape = (128, 8, 8) data_shape = (16,) + input_shape x = tf.random_normal(shape=data_shape, dtype=tf.float64) dy = tf.random_normal(shape=data_shape, dtype=tf.float64) dy1, dy2 = tf.split(dy, num_or_size_splits=2, axis=1) block = blocks.RevBlock( n_res=3, filters=128, strides=(1, 1), input_shape=input_shape, fused=False, dtype=tf.float64) with tf.GradientTape() as tape: tape.watch(x) x1, x2 = tf.split(x, num_or_size_splits=2, axis=1) y1, y2 = block((x1, x2), training=True) y = tf.concat((y1, y2), axis=1) # Compute true grads dx_true = tape.gradient(y, x, output_gradients=dy) # Compute grads from reconstruction (dx1, dx2), _ = block.backward_grads( x=(x1, x2), y=(y1, y2), dy=(dy1, dy2), training=True) dx = tf.concat((dx1, dx2), axis=1) thres = 1e-5 diff_abs = tf.reshape(abs(dx - dx_true), [-1]) assert all(diff_abs < thres)
def make_net(self, input_images, input_measurements, input_actions, input_objectives, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() self.fc_val_params = np.copy(self.fc_joint_params) self.fc_val_params['out_dims'][-1] = self.target_dim self.fc_adv_params = np.copy(self.fc_joint_params) self.fc_adv_params['out_dims'][-1] = len(self.net_discrete_actions) * self.target_dim p_img_conv = my_ops.conv_encoder(input_images, self.conv_params, 'p_img_conv', msra_coeff=0.9) p_img_fc = my_ops.fc_net(my_ops.flatten(p_img_conv), self.fc_img_params, 'p_img_fc', msra_coeff=0.9) p_meas_fc = my_ops.fc_net(input_measurements, self.fc_meas_params, 'p_meas_fc', msra_coeff=0.9) if isinstance(self.fc_obj_params, np.ndarray): p_obj_fc = my_ops.fc_net(input_objectives, self.fc_obj_params, 'p_obj_fc', msra_coeff=0.9) p_concat_fc = tf.concat([p_img_fc,p_meas_fc,p_obj_fc], 1) else: p_concat_fc = tf.concat([p_img_fc,p_meas_fc], 1) if self.random_objective_coeffs: raise Exception('Need fc_obj_params with randomized objectives') p_val_fc = my_ops.fc_net(p_concat_fc, self.fc_val_params, 'p_val_fc', last_linear=True, msra_coeff=0.9) p_adv_fc = my_ops.fc_net(p_concat_fc, self.fc_adv_params, 'p_adv_fc', last_linear=True, msra_coeff=0.9) adv_reshape = tf.reshape(p_adv_fc, [-1, len(self.net_discrete_actions), self.target_dim]) pred_all_nomean = adv_reshape - tf.reduce_mean(adv_reshape, reduction_indices=1, keep_dims=True) pred_all = pred_all_nomean + tf.reshape(p_val_fc, [-1, 1, self.target_dim]) pred_relevant = tf.boolean_mask(pred_all, tf.cast(input_actions, tf.bool)) return pred_all, pred_relevant
def wide_model(numeric_input, category_input, vocabs): transpose_category_input = tf.transpose(category_input) category_sum = None # Append embadding category to numeric_sum for i in range(0, len(vocabs)): embedding = tf.get_variable("wideem" + str(i), [vocabs[i], 8], initializer=tf.contrib.layers.xavier_initializer() #partitioner=tf.fixed_size_partitioner(n_pss)) #partitioner=tf.min_max_variable_partitioner(n_pss, 0, 2 << 10) ) # Pick one column from category input col = tf.gather(transpose_category_input, [i])[0] #col = tf.nn.embedding_lookup(transpose_category_input, [i])[0] # Same as make [0001]*[w1,w2,w3,w4] = lookup w4 #embedded_col = embedding_lookup(tf.identity(embedding), col) # number * embedding output number embedded_col = embedding_ops.embedding_lookup_unique(embedding, col) if category_sum is None: category_sum = embedded_col else: category_sum = tf.concat([category_sum, embedded_col], 1) tf.set_random_seed(1) w = tf.get_variable("W", [numeric_input.shape[1] + category_sum.shape[1], 1], initializer=tf.contrib.layers.xavier_initializer()) wmodel_logits_sum = tf.matmul(tf.concat([numeric_input, category_sum], 1), w) return wmodel_logits_sum
def _construct(self): """ Construct the model; main part of it goes here """ # our query = m_u + e_i query = (self._cur_user, self._cur_item) neg_query = (self._cur_user, self._cur_item_negative) # Positive neighbor = self._mem_layer(query, self.user_memory(self.input_neighborhoods), self.user_output(self.input_neighborhoods), self.input_neighborhood_lengths, self.config.max_neighbors)[-1].output self.score = self._output_module(tf.concat([self._cur_user * self._cur_item, neighbor], axis=1)) # Negative neighbor_negative = self._mem_layer(neg_query, self.user_memory(self.input_neighborhoods_negative), self.user_output(self.input_neighborhoods_negative), self.input_neighborhood_lengths_negative, self.config.max_neighbors)[-1].output negative_output = self._output_module(tf.concat( [self._cur_user * self._cur_item_negative, neighbor_negative], axis=1)) # Loss and Optimizer self.loss = LossLayer()(self.score, negative_output) self._optimizer = OptimizerLayer(self.config.optimizer, clip=self.config.grad_clip, params=self.config.optimizer_params) self.train = self._optimizer(self.loss) tf.add_to_collection(GraphKeys.PREDICTION, self.score)
def encode_coordinates_alt(self, net): """An alternative implemenation for the encoding coordinates. Args: net: a tensor of shape=[batch_size, height, width, num_features] Returns: a list of tensors with encoded image coordinates in them. """ batch_size, h, w, _ = net.shape.as_list() h_loc = [ tf.tile( tf.reshape( tf.contrib.layers.one_hot_encoding( tf.constant([i]), num_classes=h), [h, 1]), [1, w]) for i in xrange(h) ] h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2) w_loc = [ tf.tile( tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w), [h, 1]) for i in xrange(w) ] w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2) loc = tf.concat([h_loc, w_loc], 2) loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1]) return tf.concat([net, loc], 3)
def prepare_image_question_encoder(image_feat, question, hparams): """Prepare encoder. Args: image_feat: a Tensor. question: a Tensor. hparams: run hyperparameters Returns: encoder_input: a Tensor, bottom of encoder stack encoder_self_attention_bias: a bias tensor for use in encoder self-attention """ encoder_input = tf.concat([image_feat, question], axis=1) encoder_padding = common_attention.embedding_to_padding(encoder_input) ignore_padding = common_attention.attention_bias_ignore_padding( encoder_padding) encoder_self_attention_bias = ignore_padding encoder_decoder_attention_bias = ignore_padding # Usual case - not a packed dataset. if hparams.pos == "timing": question = common_attention.add_timing_signal_1d(question) elif hparams.pos == "emb": question = common_attention.add_positional_embedding( question, hparams.max_length, "inputs_positional_embedding", None) encoder_input = tf.concat([image_feat, question], axis=1) return (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias)
def mmd_objective(z, s, sdim): """ Compute the MMD from latent space and nuisance_id Notes: Reimplementation in tensorflow of the Variational Fair Autoencoder https://arxiv.org/abs/1511.00830 """ #mmd_method = mmd_rbf mmd_method = mmd_fourier z_dim = z.get_shape().as_list()[1] # STEP 1: construct lists of samples in their proper batches z_part = tf.dynamic_partition(z, s, sdim) # STEP 2: add noise to all of them and get the mmd mmd = 0 for j, z_j in enumerate(z_part): z0_ = z_j aux_z0 = tf.random_normal([1, z_dim]) # if an S category does not have any samples z0 = tf.concat([z0_, aux_z0], 0) if len(z_part) == 2: z1_ = z_part[j + 1] aux_z1 = tf.random_normal((1, z_dim)) z1 = tf.concat([z1_, aux_z1], axis=0) return mmd_method(z0, z1) z1 = z mmd += mmd_method(z0, z1) return mmd
def testSampleFromDiscretizedMixLogistic(self): batch = 2 height = 4 width = 4 num_mixtures = 5 seed = 42 logits = tf.concat( # assign all probability mass to first component [tf.ones([batch, height, width, 1]) * 1e8, tf.zeros([batch, height, width, num_mixtures - 1])], axis=-1) locs = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-.9, maxval=.9) log_scales = tf.ones([batch, height, width, num_mixtures * 3]) * -1e8 coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3])) pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1) locs_0 = locs[..., :3] expected_sample = tf.clip_by_value(locs_0, -1., 1.) actual_sample = common_layers.sample_from_discretized_mix_logistic( pred, seed=seed) actual_sample_val, expected_sample_val = self.evaluate( [actual_sample, expected_sample]) # Use a low tolerance: samples numerically differ, as the actual # implementation clips log-scales so they always contribute to sampling. self.assertAllClose(actual_sample_val, expected_sample_val, atol=1e-2)
def get_idx_map(shape): """Get index map for a image. Args: shape: [B, T, H, W] or [B, H, W] Returns: idx: [B, T, H, W, 2], or [B, H, W, 2] """ s = shape ndims = tf.shape(s) wdim = ndims - 1 hdim = ndims - 2 idx_shape = tf.concat(0, [s, tf.constant([1])]) ones_h = tf.ones(hdim - 1, dtype='int32') ones_w = tf.ones(wdim - 1, dtype='int32') h_shape = tf.concat(0, [ones_h, tf.constant([-1]), tf.constant([1, 1])]) w_shape = tf.concat(0, [ones_w, tf.constant([-1]), tf.constant([1])]) idx_y = tf.zeros(idx_shape, dtype='float') idx_x = tf.zeros(idx_shape, dtype='float') h = tf.slice(s, ndims - 2, [1]) w = tf.slice(s, ndims - 1, [1]) idx_y += tf.reshape(tf.to_float(tf.range(h[0])), h_shape) idx_x += tf.reshape(tf.to_float(tf.range(w[0])), w_shape) idx = tf.concat(ndims[0], [idx_y, idx_x]) return idx
def din_fcn_shine(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False): if isinstance(facts, tuple): # In case of Bi-RNN, concatenate the forward and the backward RNN # outputs. facts = tf.concat(facts, 2) if time_major: # (T,B,D) => (B,T,D) facts = tf.array_ops.transpose(facts, [1, 0, 2]) # Trainable parameters mask = tf.equal(mask, tf.ones_like(mask)) # D value - hidden size of the RNN layer facts_size = facts.get_shape().as_list()[-1] querry_size = query.get_shape().as_list()[-1] query = tf.layers.dense( query, facts_size, activation=None, name='f1_trans_shine' + stag) query = prelu(query) queries = tf.tile(query, [1, tf.shape(facts)[1]]) queries = tf.reshape(queries, tf.shape(facts)) din_all = tf.concat( [queries, facts, queries - facts, queries * facts], axis=-1) d_layer_1_all = tf.layers.dense( din_all, facts_size, activation=tf.nn.sigmoid, name='f1_shine_att' + stag) d_layer_2_all = tf.layers.dense( d_layer_1_all, facts_size, activation=tf.nn.sigmoid, name='f2_shine_att' + stag) d_layer_2_all = tf.reshape(d_layer_2_all, tf.shape(facts)) output = d_layer_2_all return output
def testDiscretizedMixLogisticLoss(self): batch = 2 height = 4 width = 4 channels = 3 num_mixtures = 5 logits = tf.concat( # assign all probability mass to first component [tf.ones([batch, height, width, 1]) * 1e8, tf.zeros([batch, height, width, num_mixtures - 1])], axis=-1) locs = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-.9, maxval=.9) log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-1., maxval=1.) coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3])) pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1) # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255. labels = tf.random_uniform([batch, height, width, channels], minval=-.9, maxval=.9) locs_0 = locs[..., :3] log_scales_0 = log_scales[..., :3] centered_labels = labels - locs_0 inv_stdv = tf.exp(-log_scales_0) plus_in = inv_stdv * (centered_labels + 1. / 255.) min_in = inv_stdv * (centered_labels - 1. / 255.) cdf_plus = tf.nn.sigmoid(plus_in) cdf_min = tf.nn.sigmoid(min_in) expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1) actual_loss = common_layers.discretized_mix_logistic_loss( pred=pred, labels=labels) actual_loss_val, expected_loss_val = self.evaluate( [actual_loss, expected_loss]) self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
def embed_sequences(self, embed_sequence_batch): """Return sentence embeddings as a tensor with with shape [batch_size, hidden_size * 2] """ forward_values = embed_sequence_batch.values forward_mask = embed_sequence_batch.mask backward_values = tf.reverse(forward_values, [False, True, False]) backward_mask = tf.reverse(forward_mask, [False, True]) # Initialize LSTMs self._forward_lstm = LSTM(self.hidden_size, return_sequences=True) self._backward_lstm = LSTM(self.hidden_size, return_sequences=True) # Pass input through the LSTMs # Shape: (batch_size, seq_length, hidden_size) forward_seq = self._forward_lstm(forward_values, forward_mask) forward_seq.set_shape((None, self.seq_length, self.hidden_size)) backward_seq = self._backward_lstm(backward_values, backward_mask) backward_seq.set_shape((None, self.seq_length, self.hidden_size)) # Stitch the outputs together --> hidden states (for computing attention) # Final dimension: (batch_size, seq_length, hidden_size * 2) lstm_states = tf.concat(2, [forward_seq, tf.reverse(backward_seq, [False, True, False])]) self._hidden_states = SequenceBatch(lstm_states, forward_mask) # Stitch the final outputs together --> sequence embedding # Final dimension: (batch_size, hidden_size * 2) seq_length = tf.shape(forward_values)[1] forward_final = tf.slice(forward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size]) backward_final = tf.slice(backward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size]) return tf.squeeze(tf.concat(2, [forward_final, backward_final]), [1])
def _define_distance_to_clusters(self, data): """Defines the Mahalanobis distance to the assigned Gaussian.""" # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input - # mean) from log probability function. self._all_scores = [] for shard in data: all_scores = [] shard = tf.expand_dims(shard, 0) for c in xrange(self._num_classes): if self._covariance_type == FULL_COVARIANCE: cov = self._covs[c, :, :] elif self._covariance_type == DIAG_COVARIANCE: cov = tf.diag(self._covs[c, :]) inverse = tf.matrix_inverse(cov + self._min_var) inv_cov = tf.tile( tf.expand_dims(inverse, 0), tf.pack([self._num_examples, 1, 1])) diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2]) m_left = tf.batch_matmul(diff, inv_cov) all_scores.append(tf.sqrt(tf.batch_matmul( m_left, tf.transpose(diff, perm=[0, 2, 1]) ))) self._all_scores.append(tf.reshape( tf.concat(1, all_scores), tf.pack([self._num_examples, self._num_classes]))) # Distance to the associated class. self._all_scores = tf.concat(0, self._all_scores) assignments = tf.concat(0, self.assignments()) rows = tf.to_int64(tf.range(0, self._num_examples)) indices = tf.concat(1, [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)]) self._scores = tf.gather_nd(self._all_scores, indices)
def __init__(self, input_files, num_epochs, batch_size): filename_queue = tf.train.string_input_producer(input_files, num_epochs=num_epochs) reader = tf.TFRecordReader() _, records = reader.read(filename_queue) decoded = tf.parse_single_example(records, dense_keys=['image', 'text', 'result', 'len'], dense_types=['float', 'int64', 'int64', 'int64'], dense_shapes=[(1, config.image_features_count), (config.sents_per_sample, config.max_len), (config.sents_per_sample, config.max_len), (config.sents_per_sample, 1)]) self.image, self.text, self.result, self.lens = \ decoded['image'], decoded['text'], decoded['result'], decoded['len'] self.image = tf.concat(0, [self.image] * config.sents_per_sample) # result requires one-hot encoding clamped_result = tf.minimum(self.result, config.output_words_count) sliced_result = [tf.squeeze(tensor, [0]) for tensor in tf.split(0, config.sents_per_sample, clamped_result)] sliced_categorical_result = [self.to_categorical(tensor) for tensor in sliced_result] self.categorical_result = tf.concat(0, [tf.expand_dims(tensor, 0) for tensor in sliced_categorical_result]) self.image_input, self.text_input, self.result_input, self.lens_input = tf.train.shuffle_batch( [self.image, self.text, self.categorical_result, self.lens], batch_size=batch_size, capacity=256+config.batch_size, min_after_dequeue=128, enqueue_many=True)
def __init__(self, session, input_pipeline): self.session = session self.input_pipeline = input_pipeline text_embeddings = weight_init(config.words_count + 2, config.hidden_count) embedded = tf.split(1, config.max_len, tf.nn.embedding_lookup(text_embeddings, input_pipeline.text_input)) inputs = [tf.squeeze(input_, [1]) for input_ in embedded] w_image = weight_init(config.image_features_count, config.hidden_count) b_image = bias_init([config.hidden_count]) image_transform = tf.matmul(input_pipeline.image_input, w_image) + b_image hidden_start = tf.concat(1, [tf.zeros_like(image_transform), image_transform]) cell = WordCell(config.hidden_count, config.output_words_count + 1) probs_list, self.hidden = rnn.rnn( cell=cell, inputs=inputs, initial_state=hidden_start, sequence_length=input_pipeline.lens_input) self.probs = tf.concat(1, [tf.expand_dims(prob, 1) for prob in probs_list]) float_lens = tf.cast(input_pipeline.lens_input, 'float') sample_losses = tf.reduce_sum(self.probs * input_pipeline.result_input, [1, 2]) / float_lens self.loss = -tf.reduce_mean(sample_losses) self.train_task = tf.train.AdamOptimizer(1e-4).minimize(self.loss) self.loss_summary = tf.scalar_summary('loss', self.loss) self.saver = tf.train.Saver()
def _marginal_hidden_probs(self): """Compute marginal pdf for each individual observable.""" initial_log_probs = tf.broadcast_to(self._log_init, tf.concat([self.batch_shape_tensor(), [self._num_states]], axis=0)) # initial_log_probs :: batch_shape num_states if self._num_steps > 1: transition_log_probs = self._log_trans def forward_step(log_probs, _): return _log_vector_matrix(log_probs, transition_log_probs) dummy_index = tf.zeros(self._num_steps - 1, dtype=tf.float32) forward_log_probs = tf.scan(forward_step, dummy_index, initializer=initial_log_probs, name="forward_log_probs") forward_log_probs = tf.concat([[initial_log_probs], forward_log_probs], axis=0) else: forward_log_probs = initial_log_probs[tf.newaxis, ...] # returns :: num_steps batch_shape num_states return tf.exp(forward_log_probs)
def _RunAndVerifyGradientsRandom(self, use_gpu): # Random dims of rank 5 input_shape = np.random.randint(1, 5, size=5) # Random number of tensors num_tensors = np.random.randint(1, 10) # Random dim to concat on concat_dim = np.random.randint(5) concat_dim_sizes = np.random.randint(1, 5, size=num_tensors) with self.test_session(use_gpu=use_gpu): inp = [] inp_tensors = [] for x in concat_dim_sizes: shape = input_shape shape[concat_dim] = x t = np.random.rand(*shape).astype("f") inp.append(t) inp_tensors.append( tf.constant([float(y) for y in t.flatten()], shape=shape, dtype=tf.float32)) c = tf.concat(concat_dim, inp_tensors) output_shape = input_shape output_shape[concat_dim] = concat_dim_sizes.sum() grad_inp = np.random.rand(*output_shape).astype("f") grad_tensor = tf.constant([float(x) for x in grad_inp.flatten()], shape=output_shape) grad = tf.gradients([c], inp_tensors, [grad_tensor]) concated_grad = tf.concat(concat_dim, grad) result = concated_grad.eval() self.assertAllEqual(result, grad_inp)
def _add_gtboxes_as_first_stage_proposals(self, first_stage_proposals, first_stage_scores, gtboxes): # 1. jitter gtboxes ws = gtboxes[:, 2] hs = gtboxes[:, 3] thetas = gtboxes[:, 4] hs_offset = (tf.random_normal(shape=tf.shape(hs)) - 0.5)*0.1*hs ws_offset = (tf.random_normal(shape=tf.shape(ws)) - 0.5)*0.1*ws thetas_offset = (tf.random_normal(shape=tf.shape(thetas)) - 0.5)*0.1*thetas hs = hs + hs_offset ws = ws + ws_offset thetas = thetas + thetas_offset new_boxes = tf.transpose(tf.stack([gtboxes[:, 0], gtboxes[:, 1], ws, hs, thetas], axis=0)) # 2. get needed added gtboxes num_needed_add = tf.minimum(tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE*cfgs.FAST_RCNN_POSITIVE_RATE*0.5, tf.int32), tf.shape(gtboxes)[0]) added_boxes_indices = tf.random_shuffle(tf.range(start=0, limit=tf.shape(new_boxes)[0])) added_boxes_indices = tf.slice(added_boxes_indices, begin=[0], size=[num_needed_add]) added_boxes = tf.gather(new_boxes, added_boxes_indices) # 3. add them all_boxes = tf.concat([first_stage_proposals, added_boxes], axis=0) all_scores = tf.concat([first_stage_scores, tf.ones(shape=[tf.shape(added_boxes)[0]])*0.95], axis=0) return all_boxes, all_scores
def rotate(first, second, offset=None): rotations = [tf.concat(first[:offset], axis=3)] elem = first for e in second: elem = elem[1:]+[e] rotations.append(tf.concat(elem[:offset], axis=3)) return rotations
def multilevel_roi_align(features, rcnn_boxes, resolution): """ Args: features ([tf.Tensor]): 4 FPN feature level 2-5 rcnn_boxes (tf.Tensor): nx4 boxes resolution (int): output spatial resolution Returns: NxC x res x res """ assert len(features) == 4, features # Reassign rcnn_boxes to levels level_ids, level_boxes = fpn_map_rois_to_levels(rcnn_boxes) all_rois = [] # Crop patches from corresponding levels for i, boxes, featuremap in zip(itertools.count(), level_boxes, features): with tf.name_scope('roi_level{}'.format(i + 2)): boxes_on_featuremap = boxes * (1.0 / cfg.FPN.ANCHOR_STRIDES[i]) all_rois.append(roi_align(featuremap, boxes_on_featuremap, resolution)) all_rois = tf.concat(all_rois, axis=0) # NCHW # Unshuffle to the original order, to match the original samples level_id_perm = tf.concat(level_ids, axis=0) # A permutation of 1~N level_id_invert_perm = tf.invert_permutation(level_id_perm) all_rois = tf.gather(all_rois, level_id_invert_perm) return all_rois
def make_multivariate_mixture(batch_shape, num_components, event_shape, use_static_graph, batch_shape_tensor=None): if batch_shape_tensor is None: batch_shape_tensor = batch_shape batch_shape_tensor = tf.convert_to_tensor(batch_shape_tensor, tf.int32) logits = tf.random_uniform( tf.concat((batch_shape_tensor, [num_components]), 0), -1, 1, dtype=tf.float32) - 50. logits.set_shape(tf.TensorShape(batch_shape).concatenate(num_components)) static_batch_and_event_shape = ( tf.TensorShape(batch_shape).concatenate(event_shape)) event_shape = tf.convert_to_tensor(event_shape, tf.int32) batch_and_event_shape = tf.concat((batch_shape_tensor, event_shape), 0) def create_component(): loc = tf.random_normal(batch_and_event_shape) scale_diag = 10 * tf.random_uniform(batch_and_event_shape) loc.set_shape(static_batch_and_event_shape) scale_diag.set_shape(static_batch_and_event_shape) return tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale_diag) components = [create_component() for _ in range(num_components)] cat = tfd.Categorical(logits, dtype=tf.int32) return tfd.Mixture(cat, components, use_static_graph=use_static_graph)
def SequenceToImageAndDiff(images): """Convert image sequence batch into image and diff batch. Each image pair is converted to the first image and their diff. Batch size will increase if sequence length is larger than 2. Args: images: Image sequence with shape [batch_size, seq_len, image_size, image_size, channel] Returns: the list of (image, diff) tuples with shape [batch_size2, image_size, image_size, channel]. image_sizes are [32, 64, 128, 256]. """ image_diff_list = [] image_seq = tf.unstack(images, axis=1) for size in [32, 64, 128, 256]: resized_images = [ tf.image.resize_images(i, [size, size]) for i in image_seq] diffs = [] for i in xrange(0, len(resized_images)-1): diffs.append(resized_images[i+1] - resized_images[i]) image_diff_list.append( (tf.concat(axis=0, values=resized_images[:-1]), tf.concat(axis=0, values=diffs))) return image_diff_list
def test_get_predictions_with_feature_maps_of_dynamic_shape( self): image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=1, box_code_size=4) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat(box_predictions[ box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) init_op = tf.global_variables_initializer() resolution = 32 expected_num_anchors = resolution*resolution*5 with self.test_session() as sess: sess.run(init_op) (box_encodings_shape, objectness_predictions_shape) = sess.run( [tf.shape(box_encodings), tf.shape(objectness_predictions)], feed_dict={image_features: np.random.rand(4, resolution, resolution, 64)}) self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4]) self.assertAllEqual(objectness_predictions_shape, [4, expected_num_anchors, 1])
def build_lstm_forward(H, x, googlenet, phase, reuse): grid_size = H['arch']['grid_width'] * H['arch']['grid_height'] outer_size = grid_size * H['arch']['batch_size'] input_mean = 117. x -= input_mean Z = googlenet_load.model(x, googlenet, H) with tf.variable_scope('decoder', reuse=reuse): scale_down = 0.01 if H['arch']['early_dropout'] and phase == 'train': Z = tf.nn.dropout(Z, 0.5) lstm_input = tf.reshape(Z * scale_down, (H['arch']['batch_size'] * grid_size, 1024)) lstm_outputs = build_lstm_inner(lstm_input, H) pred_boxes = [] pred_logits = [] for i in range(H['arch']['rnn_len']): output = lstm_outputs[i] if H['arch']['late_dropout'] and phase == 'train': output = tf.nn.dropout(output, 0.5) box_weights = tf.get_variable('box_ip%d' % i, shape=(H['arch']['lstm_size'], 4), initializer=tf.random_uniform_initializer(-0.1, 0.1)) conf_weights = tf.get_variable('conf_ip%d' % i, shape=(H['arch']['lstm_size'], 2), initializer=tf.random_uniform_initializer(-0.1, 0.1)) pred_boxes.append(tf.reshape(tf.matmul(output, box_weights) * 50, [outer_size, 1, 4])) pred_logits.append(tf.reshape(tf.matmul(output, conf_weights), [outer_size, 1, 2])) pred_boxes = tf.concat(1, pred_boxes) pred_logits = tf.concat(1, pred_logits) pred_logits_squash = tf.reshape(pred_logits, [outer_size * H['arch']['rnn_len'], 2]) pred_confidences_squash = tf.nn.softmax(pred_logits_squash) pred_confidences = tf.reshape(pred_confidences_squash, [outer_size, H['arch']['rnn_len'], 2]) return pred_boxes, pred_logits, pred_confidences
def test_get_correct_box_encoding_and_class_prediction_shapes(self): image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32) proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32) rfcn_box_predictor = box_predictor.RfcnBoxPredictor( is_training=False, num_classes=2, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), num_spatial_bins=[3, 3], depth=4, crop_size=[12, 12], box_code_size=4 ) box_predictions = rfcn_box_predictor.predict( [image_features], num_predictions_per_location=[1], scope='BoxPredictor', proposal_boxes=proposal_boxes) box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) (box_encodings_shape, class_predictions_shape) = sess.run( [tf.shape(box_encodings), tf.shape(class_predictions_with_background)]) self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4]) self.assertAllEqual(class_predictions_shape, [8, 1, 3])
def loss_layer(self, project_logits, lengths, name=None): with tf.variable_scope("crf_loss" if not name else name): small = -1000.0 start_logits = tf.concat( [small * tf.ones(shape=[self.batch_size, 1, self.num_tags]), tf.zeros(shape=[self.batch_size, 1, 1])], axis=-1) pad_logits = tf.cast(small * tf.ones([self.batch_size, self.num_steps, 1]), tf.float32) logits = tf.concat([project_logits, pad_logits], axis=-1) logits = tf.concat([start_logits, logits], axis=1) targets = tf.concat( [tf.cast(self.num_tags * tf.ones([self.batch_size, 1]), tf.int32), self.targets], axis=-1) self.trans = tf.get_variable( "transitions", shape=[self.num_tags + 1, self.num_tags + 1], initializer=self.initializer) log_likelihood, self.trans = crf_log_likelihood( inputs=logits, tag_indices=targets, transition_params=self.trans, sequence_lengths=lengths + 1) return tf.reduce_mean(-log_likelihood)
def one_hot_matrix(tensor_in, num_classes, on_value=1.0, off_value=0.0): """Encodes indices from given tensor as one-hot tensor. TODO(ilblackdragon): Ideally implementation should be part of TensorFlow with Eigen-native operation. Args: tensor_in: Input tensor of shape [N1, N2]. num_classes: Number of classes to expand index into. on_value: Tensor or float, value to fill-in given index. off_value: Tensor or float, value to fill-in everything else. Returns: Tensor of shape [N1, N2, num_classes] with 1.0 for each id in original tensor. """ tensor_in = tf.convert_to_tensor(tensor_in) sparse_values = tf.to_int64(tf.reshape(tensor_in, [-1, 1])) size = tf.shape(sparse_values)[0] dims = tf.shape(tensor_in) indices = tf.to_int64(tf.reshape(tf.range(0, size), [-1, 1])) indices_values = tf.concat(1, [indices, sparse_values]) outshape = tf.to_int64(expand_concat(0, [size, num_classes])) one_hot_vector = tf.sparse_to_dense(indices_values, outshape, on_value, off_value) ret = tf.reshape(one_hot_vector, tf.concat(0, [dims, [num_classes]])) ret.set_shape(tensor_in.get_shape().concatenate(num_classes)) return ret
def random_shift(v): if random_shift_y: v = tf.concat([v[-random_shift_y:], v, v[:random_shift_y]], 0) if random_shift_x: v = tf.concat([v[:, -random_shift_x:], v, v[:, :random_shift_x]], 1) return tf.random_crop(v, [resize[0], resize[1], size[2]])
def __call__(self, inputs, seq_len, keep_prob=1.0, is_train=None, concat_layers=True): outputs = [tf.transpose(inputs, [1, 0, 2])] for layer in range(self.num_layers): gru_fw, gru_bw = self.grus[layer] init_fw, init_bw = self.inits[layer] mask_fw, mask_bw = self.dropout_mask[layer] with tf.variable_scope('fw_{}'.format(layer), reuse=tf.AUTO_REUSE): with tf.variable_scope('cudnn_gru', reuse=tf.AUTO_REUSE): out_fw, _ = tf.nn.dynamic_rnn(cell=gru_fw, inputs=outputs[-1] * mask_fw, time_major=True, initial_state=tuple(tf.unstack(init_fw, axis=0))) with tf.variable_scope('bw_{}'.format(layer), reuse=tf.AUTO_REUSE): with tf.variable_scope('cudnn_gru', reuse=tf.AUTO_REUSE): inputs_bw = tf.reverse_sequence( outputs[-1] * mask_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1) out_bw, _ = tf.nn.dynamic_rnn(cell=gru_bw, inputs=inputs_bw, time_major=True, initial_state=tuple(tf.unstack(init_bw, axis=0))) out_bw = tf.reverse_sequence( out_bw, seq_lengths=seq_len, seq_dim=0, batch_dim=1) outputs.append(tf.concat([out_fw, out_bw], axis=2)) if concat_layers: res = tf.concat(outputs[1:], axis=2) else: res = outputs[-1] res = tf.transpose(res, [1, 0, 2]) return res
def get_model(name): name = functools.partial('{}-{}'.format, name) self_pos = tf.placeholder(Config.dtype, Config.data_shape, name='self_pos') self_ability = tf.placeholder(Config.dtype, Config.data_shape, name='self_ability') enemy_pos = tf.placeholder(Config.dtype, Config.data_shape, name='enemy_pos') input_label = tf.placeholder(Config.dtype, Config.label_shape, name='input_label') x = tf.concat(3, [self_pos, self_ability, enemy_pos], name=name('input_concat')) y = input_label nl = tf.nn.tanh def conv_pip(name, x): name = functools.partial('{}_{}'.format, name) x = conv2d(name('0'), x, Config.data_shape[3]*2, kernel=3, stride=1, nl=nl) x = conv2d(name('1'), x, Config.data_shape[3], kernel=3, stride=1, nl=nl) return x pred = conv_pip(name('conv0'), x) for layer in range(5): pred_branch = tf.concat(3, [pred,x], name=name('concate%d'%layer)) pred += conv_pip(name('conv%d'%(layer+1)), pred_branch) x = tf.tanh(pred, name=name('control_tanh')) z = tf.mul(tf.exp(x), self_ability) z_sum = tf.reduce_sum(z, reduction_indices=[1,2,3], name=name('partition_function')) # partition function # another formula of y*logy loss = -tf.reduce_sum(tf.mul(x, y), reduction_indices=[1,2,3]) + tf.log(z_sum) z_sum = tf.reshape(z_sum, [-1, 1, 1, 1]) pred = tf.div(z, z_sum, name=name('predict')) return Model([self_pos, self_ability, enemy_pos], input_label, loss, pred, debug=z)
def next_frame(self, frames, actions, rewards, target_frame, internal_states, video_extra): del rewards, video_extra hparams = self.hparams filters = hparams.hidden_size kernel2 = (4, 4) action = actions[-1] # Stack the inputs. if internal_states is not None and hparams.concat_internal_states: # Use the first part of the first internal state if asked to concatenate. batch_size = common_layers.shape_list(frames[0])[0] internal_state = internal_states[0][0][:batch_size, :, :, :] stacked_frames = tf.concat(frames + [internal_state], axis=-1) else: stacked_frames = tf.concat(frames, axis=-1) inputs_shape = common_layers.shape_list(stacked_frames) # Update internal states early if requested. if hparams.concat_internal_states: internal_states = self.update_internal_states_early( internal_states, frames) # Using non-zero bias initializer below for edge cases of uniform inputs. x = tf.layers.dense( stacked_frames, filters, name="inputs_embed", bias_initializer=tf.random_normal_initializer(stddev=0.01)) x = common_attention.add_timing_signal_nd(x) # Down-stride. layer_inputs = [x] for i in range(hparams.num_compress_steps): with tf.variable_scope("downstride%d" % i): layer_inputs.append(x) x = tf.nn.dropout(x, 1.0 - self.hparams.dropout) x = common_layers.make_even_size(x) if i < hparams.filter_double_steps: filters *= 2 x = common_attention.add_timing_signal_nd(x) x = tf.layers.conv2d(x, filters, kernel2, activation=common_layers.belu, strides=(2, 2), padding="SAME") x = common_layers.layer_norm(x) if self.has_actions: with tf.variable_scope("policy"): x_flat = tf.layers.flatten(x) policy_pred = tf.layers.dense(x_flat, self.hparams.problem.num_actions) value_pred = tf.layers.dense(x_flat, 1) value_pred = tf.squeeze(value_pred, axis=-1) else: policy_pred, value_pred = None, None # Add embedded action if present. if self.has_actions: x = common_video.inject_additional_input( x, action, "action_enc", hparams.action_injection) # Inject latent if present. Only for stochastic models. x, extra_loss = self.inject_latent(x, frames, target_frame, action) x_mid = tf.reduce_mean(x, axis=[1, 2], keepdims=True) x, internal_states = self.middle_network(x, internal_states) # Up-convolve. layer_inputs = list(reversed(layer_inputs)) for i in range(hparams.num_compress_steps): with tf.variable_scope("upstride%d" % i): x = tf.nn.dropout(x, 1.0 - self.hparams.dropout) if self.has_actions: x = common_video.inject_additional_input( x, action, "action_enc", hparams.action_injection) if i >= hparams.num_compress_steps - hparams.filter_double_steps: filters //= 2 x = tf.layers.conv2d_transpose( x, filters, kernel2, activation=common_layers.belu, strides=(2, 2), padding="SAME") y = layer_inputs[i] shape = common_layers.shape_list(y) x = x[:, :shape[1], :shape[2], :] x = common_layers.layer_norm(x + y) x = common_attention.add_timing_signal_nd(x) # Cut down to original size. x = x[:, :inputs_shape[1], :inputs_shape[2], :] x_fin = tf.reduce_mean(x, axis=[1, 2], keepdims=True) if self.is_per_pixel_softmax: x = tf.layers.dense(x, hparams.problem.num_channels * 256, name="logits") else: x = tf.layers.dense(x, hparams.problem.num_channels, name="logits") reward_pred = None if self.has_rewards: # Reward prediction based on middle and final logits. reward_pred = tf.concat([x_mid, x_fin], axis=-1) reward_pred = tf.nn.relu(tf.layers.dense( reward_pred, 128, name="reward_pred")) reward_pred = tf.squeeze(reward_pred, axis=1) # Remove extra dims reward_pred = tf.squeeze(reward_pred, axis=1) # Remove extra dims return x, reward_pred, policy_pred, value_pred, extra_loss, internal_states
def concatenate(arrs, axis=0): return tf.concat(axis=axis, values=arrs)
def conv1d_layer_sentence_representation(sent_wordembeddings): """Apply mulitple conv1d filters to extract sentence respresentations Args: sent_wordembeddings: [None, max_sent_length, wordembed_size] Returns: sent_representations: [None, sentembed_size] """ representation_from_filters = [] output_channel = 0 if FLAGS.handle_filter_output == "sum": output_channel = FLAGS.sentembed_size else: # concat output_channel = FLAGS.sentembed_size / FLAGS.max_filter_length if (output_channel * FLAGS.max_filter_length != FLAGS.sentembed_size): print( "Error: Make sure (output_channel * FLAGS.max_filter_length) is equal to FLAGS.sentembed_size." ) exit(0) for filterwidth in xrange(1, FLAGS.max_filter_length + 1): # print(filterwidth) with tf.variable_scope("Conv1D_%d" % filterwidth) as scope: # Convolution conv_filter = variable_on_cpu( "conv_filter_%d" % filterwidth, [filterwidth, FLAGS.wordembed_size, output_channel], tf.truncated_normal_initializer()) # print(conv_filter.name, conv_filter.get_shape()) conv = tf.nn.conv1d( sent_wordembeddings, conv_filter, 1, padding='VALID' ) # [None, out_width=(max_sent_length-(filterwidth-1)), output_channel] conv_biases = variable_on_cpu("conv_biases_%d" % filterwidth, [output_channel], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, conv_biases) conv = tf.nn.relu( pre_activation) # [None, out_width, output_channel] # print(conv.name, conv.get_shape()) # Max pool: Reshape conv to use max_pool conv_reshaped = tf.expand_dims( conv, 1) # [None, out_height:1, out_width, output_channel] # print(conv_reshaped.name, conv_reshaped.get_shape()) out_height = conv_reshaped.get_shape()[1].value out_width = conv_reshaped.get_shape()[2].value # print(out_height,out_width) maxpool = tf.nn.max_pool( conv_reshaped, [1, out_height, out_width, 1], [1, 1, 1, 1], padding='VALID') # [None, 1, 1, output_channel] # print(maxpool.name, maxpool.get_shape()) # Local Response Normalization maxpool_norm = tf.nn.lrn(maxpool, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75) # Settings from cifar10 # print(maxpool_norm.name, maxpool_norm.get_shape()) # Get back to original dimension maxpool_sqz = tf.squeeze(maxpool_norm, [1, 2]) # [None, output_channel] # print(maxpool_sqz.name, maxpool_sqz.get_shape()) representation_from_filters.append(maxpool_sqz) # print(representation_from_filters) final_representation = [] with tf.variable_scope("FinalOut") as scope: if FLAGS.handle_filter_output == "sum": final_representation = tf.add_n(representation_from_filters) else: final_representation = tf.concat(1, representation_from_filters) return final_representation
def main(): """Create the model and start the training. """ # Read CL arguments and snapshot the arguments into text file. args = get_arguments() utils.general.snapshot_arg(args) # The segmentation network is stride 8 by default. h, w = map(int, args.input_size.split(',')) input_size = (h, w) innet_size = (int(math.ceil(h / 8)), int(math.ceil(w / 8))) # Initialize the random seed. tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # current step step_ph = tf.placeholder(dtype=tf.float32, shape=()) # Load the reader. with tf.device('/cpu:0'): with tf.name_scope('create_inputs'): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.random_crop, args.ignore_label, IMG_MEAN) image_batch, label_batch = reader.dequeue(args.batch_size) # Allocate data evenly to each gpu. images_mgpu = nn_mgpu.split(image_batch, args.num_gpu) labels_mgpu = nn_mgpu.split(label_batch, args.num_gpu) # Create network and output predictions. outputs_mgpu = model(images_mgpu, args.num_classes, args.is_training, args.use_global_status) # Grab variable names which should be restored from checkpoints. restore_var = [ v for v in tf.global_variables() if 'block5' not in v.name or not args.not_restore_classifier ] # Collect losses from each gpu. mean_losses = [] mean_l2_losses = [] for outputs, lab in zip(outputs_mgpu, labels_mgpu): with tf.device(lab.device): # Shrink labels to the size of the network output. lab = tf.cast(lab, dtype=tf.float32) lab = tf.image.resize_nearest_neighbor(lab, innet_size, name='label_shrink') lab = tf.reshape(lab, [ -1, ]) # Ignore the location where the label value is larger than args.num_classes. not_ignore_pixel = tf.less_equal(lab, args.num_classes - 1) # Extract the indices of pixel where the gradients are propogated. pixel_inds = tf.squeeze(tf.where(not_ignore_pixel), 1) lab_gather = tf.to_int32(tf.gather(lab, pixel_inds)) # Define softmax loss. for i, out in enumerate(outputs): # Get mini-batch size on each GPU device. n = out.get_shape().as_list()[0] # Flatten predictions. out = tf.reshape(out, [-1, args.num_classes]) out_gather = tf.gather(out, pixel_inds) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=out_gather, labels=lab_gather) loss = tf.reduce_mean(loss) loss *= float(n) / float(args.batch_size) mean_losses.append(loss) # Define weight regularization loss. w = args.weight_decay l2_losses = [ w * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] l2_loss = tf.add_n(l2_losses) / float(args.num_gpu) mean_l2_losses.append(l2_loss) # Sum all loss terms. mean_seg_loss = tf.add_n(mean_losses) mean_l2_loss = tf.add_n(mean_l2_losses) reduced_loss = mean_seg_loss + mean_l2_loss # Grab variable names which are used for training. all_trainable = tf.trainable_variables() fc_trainable = [v for v in all_trainable if 'block5' in v.name] # lr*10 base_trainable = [v for v in all_trainable if 'block5' not in v.name] # lr*1 # Computes gradients per iteration. grads = tf.gradients(reduced_loss, base_trainable + fc_trainable, colocate_gradients_with_ops=True) grads_base = grads[:len(base_trainable)] grads_fc = grads[len(base_trainable):] # Define optimisation parameters. base_lr = tf.constant(args.learning_rate) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) opt_base = tf.train.MomentumOptimizer(learning_rate * 1.0, args.momentum) opt_fc = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) # Define tensorflow operations which apply gradients to update variables. train_op_base = opt_base.apply_gradients(zip(grads_base, base_trainable)) train_op_fc = opt_fc.apply_gradients(zip(grads_fc, fc_trainable)) train_op = tf.group(train_op_base, train_op_fc) # Process for visualisation. with tf.device('/cpu:0'): # Image summary for input image, ground-truth label and prediction. cat_output = tf.concat([o[-1] for o in outputs_mgpu], axis=0) output_vis = tf.image.resize_nearest_neighbor( cat_output, tf.shape(image_batch)[1:3, ]) output_vis = tf.argmax(output_vis, axis=3) output_vis = tf.expand_dims(output_vis, dim=3) output_vis = tf.cast(output_vis, dtype=tf.uint8) labels_vis = tf.cast(label_batch, dtype=tf.uint8) in_summary = tf.py_func(utils.general.inv_preprocess, [image_batch, IMG_MEAN], tf.uint8) gt_summary = tf.py_func(utils.general.decode_labels, [labels_vis, args.num_classes], tf.uint8) out_summary = tf.py_func(utils.general.decode_labels, [output_vis, args.num_classes], tf.uint8) # Concatenate image summaries in a row. total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[in_summary, gt_summary, out_summary]), max_outputs=args.batch_size) # Scalar summary for different loss terms. seg_loss_summary = tf.summary.scalar('seg_loss', mean_seg_loss) total_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. pbar = tqdm(range(args.num_steps)) for step in pbar: start_time = time.time() feed_dict = {step_ph: step} step_loss = 0 for it in range(args.iter_size): # Update summary periodically. if it == args.iter_size - 1 and step % args.update_tb_every == 0: sess_outs = [reduced_loss, total_summary, train_op] loss_value, summary, _ = sess.run(sess_outs, feed_dict=feed_dict) summary_writer.add_summary(summary, step) else: sess_outs = [reduced_loss, train_op] loss_value, _ = sess.run(sess_outs, feed_dict=feed_dict) step_loss += loss_value step_loss /= args.iter_size lr = sess.run(learning_rate, feed_dict=feed_dict) # Save trained model periodically. if step % args.save_pred_every == 0 and step > 0: save(saver, sess, args.snapshot_dir, step) duration = time.time() - start_time desc = 'loss = {:.3f}, lr = {:.6f}'.format(step_loss, lr) pbar.set_description(desc) coord.request_stop() coord.join(threads)
def concat(self, inputs, axis, name): return tf.concat(axis=axis, values=inputs, name=name)
def inference(self): with tf.variable_scope('first_order_part'): first_ord_w = tf.get_variable(name='first_ord_w', shape=[self.feat_num, 1], dtype=tf.float32) first_order = tf.nn.embedding_lookup(first_ord_w, self.index) # (batch, m, 1) first_order = tf.reduce_sum(tf.multiply( first_order, tf.expand_dims(self.x, axis=2)), axis=2) # (batch, m) with tf.variable_scope('emb_part'): embed_matrix = tf.get_variable(name='second_ord_v', shape=[self.feat_num, self.vec_dim], dtype=tf.float32) embed_v = tf.nn.embedding_lookup(embed_matrix, self.index) # (batch, m, D) embed_x = tf.multiply(tf.expand_dims(self.x, axis=2), embed_v) # (batch, m, D) embed_x = tf.layers.dropout( embed_x, rate=self.dropout_rate, training=self.is_train) # (batch, m, D) node_num = self.field_num * self.vec_dim embed_x = tf.reshape(embed_x, shape=[-1, node_num]) # (batch, node_num) with tf.variable_scope('cin_part'): cross_tensors = [] x0_tensor = tf.reshape(embed_x, shape=[-1, self.field_num, self.vec_dim]) # (batch, m, D) cross_tensors.append(x0_tensor) field_nums = [] field_nums.append(int(self.field_num)) for i, layer_num in enumerate(self.cin_layer_num): xk_tensor = self.cin_layer(x0_tensor, cross_tensors[-1], field_nums[-1], layer_num, 'cin_layer_%d' % i) cross_tensors.append(xk_tensor) field_nums.append(layer_num) p_vec = [tf.reduce_sum(x, axis=2) for x in cross_tensors] cin = tf.concat(p_vec, axis=1) cin_lens = np.sum(field_nums) with tf.variable_scope('dnn_part'): dnn = embed_x in_num = node_num for i in range(len(self.dnn_layers)): out_num = self.dnn_layers[i] w = tf.get_variable(name='w_%d' % i, shape=[in_num, out_num], dtype=tf.float32) b = tf.get_variable(name='b_%d' % i, shape=[out_num], dtype=tf.float32) dnn = tf.matmul(dnn, w) + b dnn = tf.layers.dropout(tf.nn.relu(dnn), rate=self.dropout_rate, training=self.is_train) in_num = out_num with tf.variable_scope('output_part'): output = tf.concat([first_order, cin, dnn], axis=1) global_w = tf.get_variable( name='global_w', shape=[self.field_num + cin_lens + in_num, 1], dtype=tf.float32) global_b = tf.get_variable(name='global_b', shape=[1], dtype=tf.float32) self.y_logits = tf.matmul(output, global_w) + global_b self.y_hat = tf.nn.sigmoid(self.y_logits) self.pred_label = tf.cast(self.y_hat > 0.5, tf.int32) self.loss = -tf.reduce_mean(self.y * tf.log(self.y_hat + 1e-8) + (1 - self.y) * tf.log(1 - self.y_hat + 1e-8)) self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
def calc_gradients( test_file, model_name, output_file_dir, max_iter, learning_rate=0.0001, targets=None, weight_loss2=1, data_spec=None, batch_size=1, seq_len=40): """Compute the gradients for the given network and images.""" spec = data_spec modifier = tf.Variable(0.01*np.ones((1, seq_len, spec.crop_size,spec.crop_size,spec.channels),dtype=np.float32)) input_image = tf.placeholder(tf.float32, (batch_size, seq_len, spec.crop_size, spec.crop_size, spec.channels)) input_label = tf.placeholder(tf.int32, (batch_size)) # temporal mask, 1 indicates the selected frame indicator = [0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0] true_image = tf.minimum(tf.maximum(modifier[0,0,:,:,:]+input_image[0,0,:,:,:]*255.0, -spec.mean+spec.rescale[0]), -spec.mean+spec.rescale[1])/255.0 true_image = tf.expand_dims(true_image, 0) for ll in range(seq_len-1): if indicator[ll+1] == 1: mask_temp = tf.minimum(tf.maximum(modifier[0,ll+1,:,:,:]+input_image[0,ll+1,:,:,:]*255.0, -spec.mean+spec.rescale[0]), -spec.mean+spec.rescale[1])/255.0 else: mask_temp = input_image[0,ll+1,:,:,:] mask_temp = tf.expand_dims(mask_temp,0) true_image = tf.concat([true_image, mask_temp],0) true_image = tf.expand_dims(true_image, 0) for kk in range(batch_size-1): true_image_temp = tf.minimum(tf.maximum(modifier[0,0,:,:,:]+input_image[kk+1,0,:,:,:]*255.0, -spec.mean+spec.rescale[0]), -spec.mean+spec.rescale[1])/255.0 true_image_temp = tf.expand_dims(true_image_temp, 0) for ll in range(seq_len-1): if indicator[ll+1] == 1: mask_temp = tf.minimum(tf.maximum(modifier[0,ll+1,:,:,:]+input_image[kk+1,ll+1,:,:,:]*255.0, -spec.mean+spec.rescale[0]), -spec.mean+spec.rescale[1])/255.0 else: mask_temp = input_image[kk+1,ll+1,:,:,:] mask_temp = tf.expand_dims(mask_temp,0) true_image_temp = tf.concat([true_image_temp, mask_temp],0) true_image_temp = tf.expand_dims(true_image_temp, 0) true_image = tf.concat([true_image, true_image_temp],0) loss2 = tf.reduce_sum(tf.sqrt(tf.reduce_mean(tf.square(true_image-input_image), axis=[0, 2, 3, 4]))) norm_frame = tf.reduce_mean(tf.abs(modifier), axis=[2,3,4]) sess = tf.Session() probs, variable_set, pre_label,ince_output, pre_node = models.get_model(sess, true_image, model_name, False) true_label_prob = tf.reduce_sum(probs*tf.one_hot(input_label,101),[1]) if targets is None: loss1 = -tf.log(1 - true_label_prob + 1e-6) else: loss1 = -tf.log(true_label_prob + 1e-6) loss1 = tf.reduce_mean(loss1) loss = loss1 + weight_loss2 * loss2 optimizer = tf.train.AdamOptimizer(learning_rate) print('optimizer.minimize....') train = optimizer.minimize(loss, var_list=[modifier]) # initiallize all uninitialized varibales init_varibale_list = set(tf.all_variables()) - variable_set sess.run(tf.initialize_variables(init_varibale_list)) data = DataSet(test_list=test_file, seq_length=seq_len,image_shape=(spec.crop_size, spec.crop_size, spec.channels)) all_names = [] all_images = [] all_labels = [] def_len = 40 for video in data.test_data: frames = data.get_frames_for_sample(video) if len(frames) < def_len: continue frames = data.rescale_list(frames, def_len) frames_data = data.build_image_sequence(frames) all_images.append(frames_data) label, hot_labels = data.get_class_one_hot(video[1]) all_labels.append(label) all_names.append(frames) total = len(all_names) all_indices = range(total) num_batch = total/batch_size print('process data length:', num_batch) correct_ori = 0 correct_noi = 0 tot_image = 0 for ii in range(num_batch): images = all_images[ii*batch_size : (ii+1)*batch_size] names = all_names[ii*batch_size : (ii+1)*batch_size] labels = all_labels[ii*batch_size : (ii+1)*batch_size] indices = all_indices[ii*batch_size : (ii+1)*batch_size] print('------------------prediction for clean video-------------------') print('---video-level prediction---') for xx in range(len(indices)): print(names[xx][0],'label:', labels[xx], 'indice:',indices[xx], 'size:', len(images[xx]), len(images[xx][0]), len(images[xx][0][0]), len(images[xx][0][0][0])) sess.run(tf.initialize_variables(init_varibale_list)) if targets is not None: labels = [targets[e] for e in names] feed_dict = {input_image: [images[0][0:seq_len]], input_label: labels} var_loss, true_prob, var_loss1, var_loss2, var_pre, var_node = sess.run((loss, true_label_prob, loss1, loss2, pre_label, pre_node), feed_dict=feed_dict) correct_pre = correct_ori for xx in range(len(indices)): if labels[xx] == var_pre[xx]: correct_ori += 1 tot_image += 1 print 'Start!' min_loss = var_loss last_min = -1 print('---frame-wise prediction---') print('node_label:', var_node, 'label loss:', var_loss1, 'content loss:', var_loss2, 'prediction:', var_pre, 'probib', true_prob) # record numer of iteration tot_iter = 0 if correct_pre == correct_ori: ii += 1 continue print('------------------prediction for adversarial video-------------------') for cur_iter in range(max_iter): tot_iter += 1 sess.run(train, feed_dict=feed_dict) var_loss, true_prob, var_loss1, var_loss2, var_pre, var_node = sess.run((loss, true_label_prob, loss1, loss2, pre_label, pre_node), feed_dict=feed_dict) print('iter:', cur_iter, 'total loss:', var_loss, 'label loss:', var_loss1, 'content loss:', var_loss2, 'prediction:', var_pre, 'probib:', true_prob) break_condition = False if var_loss < min_loss: if np.absolute(var_loss-min_loss) < 0.00001: break_condition = True print(last_min) min_loss = var_loss last_min = cur_iter if cur_iter + 1 == max_iter or break_condition: print('iter:', cur_iter, 'node_label:', var_node, 'label loss:', var_loss1, 'content loss:', var_loss2, 'prediction:', var_pre, 'probib:', true_prob) var_diff, var_probs, noise_norm = sess.run((modifier, probs, norm_frame), feed_dict=feed_dict) for pp in range(seq_len): # print the map value for each frame print(noise_norm[0][pp]) for i in range(len(indices)): top1 = var_probs[i].argmax() if labels[i] == top1: correct_noi += 1 break print('saved modifier paramters.', ii) for ll in range(len(indices)): for kk in range(def_len): if kk < seq_len: attack_img = np.clip(images[ll][kk]*255.0+var_diff[0][kk]+data_spec.mean,data_spec.rescale[0],data_spec.rescale[1]) diff = np.clip(np.absolute(var_diff[0][kk])*255.0, data_spec.rescale[0],data_spec.rescale[1]) else: attack_img = np.clip(images[ll][kk]*255.0+data_spec.mean,data_spec.rescale[0],data_spec.rescale[1]) diff = np.zeros((spec.crop_size,spec.crop_size,spec.channels)) im_diff = scipy.misc.toimage(arr=diff, cmin=data_spec.rescale[0], cmax=data_spec.rescale[1]) im = scipy.misc.toimage(arr=attack_img, cmin=data_spec.rescale[0], cmax=data_spec.rescale[1]) new_name = names[ll][kk].split('/') adv_dir = output_file_dir+'/adversarial/' dif_dir = output_file_dir+'/noise/' if not os.path.exists(adv_dir): os.mkdir(adv_dir) os.mkdir(dif_dir) tmp_dir = adv_dir+new_name[-2] tmp1_dir = dif_dir+new_name[-2] if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) os.mkdir(tmp1_dir) new_name = new_name[-1] + '.png' im.save(tmp_dir + '/' +new_name) im_diff.save(tmp1_dir + '/' +new_name) print('saved adversarial frames.', ii) print('correct_ori:', correct_ori, 'correct_noi:', correct_noi)
def add_logits_op_conv(self): """Defines self.logits For each word in each sentence of the batch, it corresponds to a vector of scores, of dimension equal to the number of tags. """ with tf.name_scope("conv-maxpool"): # ( (BATCH_SIZE*WORDS), WINDOW_LEN, DIM, 1 ) pooled_out = [] for i, filter_size in enumerate(self.config.FILTER_SIZE): filter_shape = [ filter_size, self.config.DIM, 1, self.config.NUMBER_OF_FEATURE_MAPS[i] ] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") #W = tf.get_variable(shape = filter_shape, initializer = tf.truncated_normal_initializer(stddev=0.001), name="W"+str(i)) b = tf.Variable(tf.constant( 0.1, shape=[self.config.NUMBER_OF_FEATURE_MAPS[i]]), name="b") conv = tf.nn.conv2d(self.image_patches_reshaped, filter=W, strides=[1, 1, 1, 1], padding="VALID", name="conv") #print(tf.Print(conv,[conv])) # conv = tf.squeeze(conv) # ( (BATCH_SIZE*WORDS), WINDOW_LEN-FILTER_SIZE + 1, NUMBER_OF_FEATURE_MAPS) #conv = tf.nn.bias_add(conv,b) #conv = tf.nn.relu(conv) #conv_non_linear = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # ( (BATCH_SIZE*WORDS), WINDOW_LEN-FILTER_SIZE + 1, 1, NUMBER_OF_FEATURE_MAPS) pooled = tf.nn.max_pool(conv, ksize=[ 1, (self.config.WINDOW_LEN - filter_size + 1), 1, 1 ], strides=[1, 1, 1, 1], padding='VALID', data_format='NHWC', name="pool") pooled = tf.squeeze( pooled) # ( (BATCH_SIZE*WORDS), NUMBER_OF_FEATURE_MAPS) self.output = tf.reshape(pooled, (-1, tf.shape( self.word_ids)[1], self.config.NUMBER_OF_FEATURE_MAPS[i])) pooled_out.append(self.output) self.h_pool = tf.concat(pooled_out, 2) with tf.name_scope("size_calc"): size = 0 for i in range(len(self.config.FILTER_SIZE)): size += self.config.NUMBER_OF_FEATURE_MAPS[i] with tf.name_scope("conv2-maxpool"): filter_shape = [ self.config.conv2_filter_size, size, self.config.conv2_dim ] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W_") b = tf.Variable(tf.constant(0.1, shape=[self.config.conv2_dim]), name="b_") conv_ = tf.nn.conv1d(self.h_pool, filters=W, stride=1, padding="SAME", name="conv_") #conv_ = tf.squeeze(conv_) with tf.variable_scope("proj"): dense_input = tf.reshape(self.h_pool, (-1, size)) #dense_input = tf.nn.dropout(dense_input, self.dropout_conv) output = tf.contrib.layers.fully_connected( dense_input, self.config.mlp_size, activation_fn=tf.nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=True, seed=1227), weights_regularizer=tf.contrib.layers.l2_regularizer(0.001), biases_initializer=tf.zeros_initializer(), trainable=True, scope="input1") # dense_input = tf.reshape(self.image_patches_reshaped, (-1, self.config.WINDOW_LEN * self.config.dim_word)) # #dense_input = tf.nn.dropout(dense_input, self.dropout_conv) # output2 = tf.contrib.layers.fully_connected( # dense_input, # self.config.mlp_size, # activation_fn=tf.nn.relu, # normalizer_fn=None, # normalizer_params=None, # weights_initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=1227), # #weights_regularizer=tf.contrib.layers.l2_regularizer(0.001), # biases_initializer=tf.zeros_initializer(), # trainable=True, # scope="input3" # ) # #output = tf.concat([tf.reshape(output,(-1, tf.shape(self.word_ids)[1], self.config.mlp_size)),tf.reshape(output2,(-1, tf.shape(self.word_ids)[1], self.config.mlp_size))],axis=2) # #output = tf.reshape(output, (-1, tf.shape(self.word_ids)[1]*2*self.config.mlp_size)) # output = tf.concat([output, output2],axis = 1) output = tf.nn.dropout(output, self.dropout_conv) output = tf.contrib.layers.fully_connected( output, self.config.ntags, activation_fn=None, normalizer_fn=None, normalizer_params=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=True, seed=1227), weights_regularizer=tf.contrib.layers.l2_regularizer(0.001), biases_initializer=tf.zeros_initializer(), trainable=True, scope="input2") self.logits = tf.reshape( output, (-1, tf.shape(self.word_ids)[1], self.config.ntags))
def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" print "-" * 80 print "Build controller sampler" anchors = [] anchors_w_1 = [] arc_seq = [] entropys = [] log_probs = [] skip_count = [] skip_penaltys = [] prev_c = [ tf.zeros([1, self.lstm_size], tf.float32) for _ in xrange(self.lstm_num_layers) ] prev_h = [ tf.zeros([1, self.lstm_size], tf.float32) for _ in xrange(self.lstm_num_layers) ] inputs = self.g_emb skip_targets = tf.constant([1.0 - self.skip_target, self.skip_target], dtype=tf.float32) for layer_id in xrange(self.num_layers): if self.search_whole_channels: next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logit = tf.matmul(next_h[-1], self.w_soft) if self.temperature is not None: logit /= self.temperature if self.tanh_constant is not None: logit = self.tanh_constant * tf.tanh(logit) if self.search_for == "macro" or self.search_for == "branch": branch_id = tf.multinomial(logit, 1) branch_id = tf.to_int32(branch_id) branch_id = tf.reshape(branch_id, [1]) elif self.search_for == "connection": branch_id = tf.constant([0], dtype=tf.int32) else: raise ValueError("Unknown search_for {}".format( self.search_for)) arc_seq.append(branch_id) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit, labels=branch_id) log_probs.append(log_prob) entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob)) entropys.append(entropy) inputs = tf.nn.embedding_lookup(self.w_emb, branch_id) else: for branch_id in xrange(self.num_branches): next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logit = tf.matmul(next_h[-1], self.w_soft["start"][branch_id]) if self.temperature is not None: logit /= self.temperature if self.tanh_constant is not None: logit = self.tanh_constant * tf.tanh(logit) start = tf.multinomial(logit, 1) start = tf.to_int32(start) start = tf.reshape(start, [1]) arc_seq.append(start) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit, labels=start) log_probs.append(log_prob) entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob)) entropys.append(entropy) inputs = tf.nn.embedding_lookup( self.w_emb["start"][branch_id], start) next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logit = tf.matmul(next_h[-1], self.w_soft["count"][branch_id]) if self.temperature is not None: logit /= self.temperature if self.tanh_constant is not None: logit = self.tanh_constant * tf.tanh(logit) mask = tf.range(0, limit=self.out_filters - 1, delta=1, dtype=tf.int32) mask = tf.reshape(mask, [1, self.out_filters - 1]) mask = tf.less_equal(mask, self.out_filters - 1 - start) logit = tf.where(mask, x=logit, y=tf.fill(tf.shape(logit), -np.inf)) count = tf.multinomial(logit, 1) count = tf.to_int32(count) count = tf.reshape(count, [1]) arc_seq.append(count + 1) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit, labels=count) log_probs.append(log_prob) entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob)) entropys.append(entropy) inputs = tf.nn.embedding_lookup( self.w_emb["count"][branch_id], count) next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h if layer_id > 0: query = tf.concat(anchors_w_1, axis=0) query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2)) query = tf.matmul(query, self.v_attn) logit = tf.concat([-query, query], axis=1) if self.temperature is not None: logit /= self.temperature if self.tanh_constant is not None: logit = self.tanh_constant * tf.tanh(logit) skip = tf.multinomial(logit, 1) skip = tf.to_int32(skip) skip = tf.reshape(skip, [layer_id]) arc_seq.append(skip) skip_prob = tf.sigmoid(logit) kl = skip_prob * tf.log(skip_prob / skip_targets) kl = tf.reduce_sum(kl) skip_penaltys.append(kl) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit, labels=skip) log_probs.append(tf.reduce_sum(log_prob, keep_dims=True)) entropy = tf.stop_gradient( tf.reduce_sum(log_prob * tf.exp(-log_prob), keep_dims=True)) entropys.append(entropy) skip = tf.to_float(skip) skip = tf.reshape(skip, [1, layer_id]) skip_count.append(tf.reduce_sum(skip)) inputs = tf.matmul(skip, tf.concat(anchors, axis=0)) inputs /= (1.0 + tf.reduce_sum(skip)) else: inputs = self.g_emb anchors.append(next_h[-1]) anchors_w_1.append(tf.matmul(next_h[-1], self.w_attn_1)) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = tf.reshape(arc_seq, [-1]) entropys = tf.stack(entropys) self.sample_entropy = tf.reduce_sum(entropys) log_probs = tf.stack(log_probs) self.sample_log_prob = tf.reduce_sum(log_probs) skip_count = tf.stack(skip_count) self.skip_count = tf.reduce_sum(skip_count) skip_penaltys = tf.stack(skip_penaltys) self.skip_penaltys = tf.reduce_mean(skip_penaltys)
def combine_children(left_tensor, right_tensor): return tf.nn.relu(tf.matmul(tf.concat(1, [left_tensor, right_tensor]), W1) + b1)
def multi_modal_network(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in tf that has both state and image inputs. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 2 layer_size = 20 dim_hidden = (n_layers - 1)*[layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 3 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i+dim)) else: x_idx = x_idx + list(range(i, i+dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1]+1] image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1] # image goes through 2 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, im_width, im_height, num_channels]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias weights = { 'wc1': get_xavier_weights([filter_size, filter_size, num_channels, num_filters[0]], (pool_size, pool_size)), # 5x5 conv, 1 input, 32 outputs 'wc2': get_xavier_weights([filter_size, filter_size, num_filters[0], num_filters[1]], (pool_size, pool_size)), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]]), 'bc2': init_bias([num_filters[1]]), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1']) conv_layer_0 = max_pool(conv_layer_0, k=pool_size) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) conv_layer_1 = max_pool(conv_layer_1, k=pool_size) conv_out_flat = tf.reshape(conv_layer_1, [-1, conv_out_size]) fc_input = tf.concat(axis=1, values=[conv_out_flat, state_input]) fc_output, _, _ = get_mlp_layers(fc_input, n_layers, dim_hidden) loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss])
def add_word_embeddings_op(self): """Defines self.word_embeddings If self.config.embeddings is not None and is a np array initialized with pre-trained word vectors, the word embeddings is just a look-up and we don't train the vectors. Otherwise, a random matrix with the correct shape is initialized. """ with tf.variable_scope("words"): if self.config.embeddings is None: self.logger.info("WARNING: randomly initializing word vectors") _word_embeddings = tf.get_variable( name="_word_embeddings", dtype=tf.float32, shape=[self.config.nwords, self.config.dim_word]) else: _word_embeddings = tf.Variable( self.config.embeddings, name="_word_embeddings", dtype=tf.float32, trainable=self.config.train_embeddings) word_embeddings = tf.nn.embedding_lookup(_word_embeddings, self.word_ids, name="word_embeddings") with tf.variable_scope("chars"): if self.config.use_chars: # get char embeddings matrix _char_embeddings = tf.get_variable( name="_char_embeddings", dtype=tf.float32, shape=[self.config.nchars, self.config.dim_char]) char_embeddings = tf.nn.embedding_lookup( _char_embeddings, self.char_ids, name="char_embeddings") # put the time dimension on axis=1 s = tf.shape(char_embeddings) char_embeddings = tf.reshape( char_embeddings, shape=[s[0] * s[1], s[-2], self.config.dim_char]) word_lengths = tf.reshape(self.word_lengths, shape=[s[0] * s[1]]) # bi lstm on chars cell_fw = tf.contrib.rnn.LSTMCell(self.config.hidden_size_char, state_is_tuple=True) cell_bw = tf.contrib.rnn.LSTMCell(self.config.hidden_size_char, state_is_tuple=True) _output = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, char_embeddings, sequence_length=word_lengths, dtype=tf.float32) # read and concat output _, ((_, output_fw), (_, output_bw)) = _output output = tf.concat([output_fw, output_bw], axis=-1) # shape = (batch size, max sentence length, char hidden size) output = tf.reshape( output, shape=[s[0], s[1], 2 * self.config.hidden_size_char]) word_embeddings = tf.concat([word_embeddings, output], axis=-1) self.word_embeddings = tf.nn.dropout(word_embeddings, self.dropout) #print(tf.Print(self.word_embeddings,[self.word_embeddings])) if self.config.conv: self.temp = tf.squeeze( tf.extract_image_patches( self.word_embeddings[:, :, :, tf.newaxis], ksizes=[1, self.config.WINDOW_LEN, self.config.DIM, 1], strides=[1, self.config.stride, self.config.DIM, 1], rates=[1, 1, 1, 1], padding='SAME')) self.image_patches = tf.reshape( self.temp, (-1, tf.shape(self.word_ids)[1], self.config.WINDOW_LEN, self.config.DIM)) self.image_patches_reshaped = tf.reshape( self.image_patches, (-1, self.config.WINDOW_LEN, self.config.DIM))[:, :, :, tf.newaxis]
def build_model(self, dataset): tf.set_random_seed(self.seed) self.dataset = dataset self.field_size = dataset.train_feat_indices.shape[1] self.feature_size = dataset.feature_size self.n_users = dataset.n_users self.n_items = dataset.n_items self.global_mean = dataset.global_mean self.total_items_unique = self.item_info if dataset.lower_upper_bound is not None: self.lower_bound = dataset.lower_upper_bound[0] self.upper_bound = dataset.lower_upper_bound[1] else: self.lower_bound = None self.upper_bound = None self.feature_indices = tf.placeholder(tf.int32, shape=[None, self.field_size], name="indices") self.feature_values = tf.placeholder(tf.float32, shape=[None, self.field_size], name="values") self.labels = tf.placeholder(tf.float32, shape=[None]) self.w = tf.Variable(tf.truncated_normal([self.feature_size + 1, 1], 0.0, 0.01)) # feature_size + 1#### self.v = tf.Variable(tf.truncated_normal([self.feature_size + 1, self.n_factors], 0.0, 0.01)) self.feature_values_reshape = tf.reshape(self.feature_values, shape=[-1, self.field_size, 1]) self.linear_embedding = tf.nn.embedding_lookup(self.w, self.feature_indices) # N * F * 1 self.linear_term = tf.reduce_sum(tf.multiply(self.linear_embedding, self.feature_values_reshape), 2) self.feature_embedding = tf.nn.embedding_lookup(self.v, self.feature_indices) # N * F * K self.feature_embedding = tf.multiply(self.feature_embedding, self.feature_values_reshape) self.pairwise_term = 0.5 * tf.subtract( tf.square(tf.reduce_sum(self.feature_embedding, axis=2)), # axis=1 ? tf.reduce_sum(tf.square(self.feature_embedding), axis=2)) self.concat = tf.concat([self.linear_term, self.pairwise_term], axis=1) if self.task == "rating": self.pred = tf.layers.dense(inputs=self.concat, units=1, name="pred") self.loss = tf.losses.mean_squared_error(labels=tf.reshape(self.labels, [-1, 1]), predictions=self.pred) if self.lower_bound is not None and self.upper_bound is not None: self.rmse = tf.sqrt(tf.losses.mean_squared_error(abels=tf.reshape(self.labels, [-1, 1]), predictions=tf.clip_by_value(self.pred, self.lower_bound, self.upper_bound))) else: self.rmse = self.loss # reg_w = self.reg * tf.nn.l2_loss(self.w) reg_v = self.reg * tf.nn.l2_loss(self.v) self.total_loss = tf.add_n([self.loss, reg_v]) elif self.task == "ranking": self.logits = tf.layers.dense(inputs=self.concat, units=1, name="logits") self.logits = tf.reshape(self.logits, [-1]) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.logits)) self.y_prob = tf.sigmoid(self.logits, name="prob") self.pred = tf.where(self.y_prob >= 0.5, tf.fill(tf.shape(self.logits), 1.0), tf.fill(tf.shape(self.logits), 0.0), name="pred") self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.pred, self.labels), tf.float32)) self.precision = precision_tf(self.pred, self.labels) # reg_w = self.reg * tf.nn.l2_loss(self.w) reg_v = self.reg * tf.nn.l2_loss(self.v) self.total_loss = tf.add_n([self.loss, reg_v])
def main(): # Get hyperparameters if FLAGS.enable_colored_log: import coloredlogs coloredlogs.install() logging.basicConfig(level=logging.INFO) INPUT_FILE_FORMAT = FLAGS.input_file_format if INPUT_FILE_FORMAT not in ["tfrecord", "csv"]: logging.error("Unknow input file format: {}".format(INPUT_FILE_FORMAT)) exit(1) FEATURE_SIZE = FLAGS.feature_size LABEL_SIZE = FLAGS.label_size EPOCH_NUMBER = FLAGS.epoch_number if EPOCH_NUMBER <= 0: EPOCH_NUMBER = None BATCH_THREAD_NUMBER = FLAGS.batch_thread_number MIN_AFTER_DEQUEUE = FLAGS.min_after_dequeue BATCH_CAPACITY = BATCH_THREAD_NUMBER * FLAGS.batch_size + MIN_AFTER_DEQUEUE MODE = FLAGS.mode MODEL = FLAGS.model CHECKPOINT_PATH = FLAGS.checkpoint_path if not CHECKPOINT_PATH.startswith("fds://") and not os.path.exists( CHECKPOINT_PATH): os.makedirs(CHECKPOINT_PATH) CHECKPOINT_FILE = CHECKPOINT_PATH + "/checkpoint.ckpt" LATEST_CHECKPOINT = tf.train.latest_checkpoint(CHECKPOINT_PATH) OUTPUT_PATH = FLAGS.output_path if not OUTPUT_PATH.startswith("fds://") and not os.path.exists(OUTPUT_PATH): os.makedirs(OUTPUT_PATH) pprint.PrettyPrinter().pprint(FLAGS.__flags) # Process TFRecoreds files def read_and_decode_tfrecord(filename_queue): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example( serialized_example, features={ "label": tf.FixedLenFeature([], tf.float32), "features": tf.FixedLenFeature([FEATURE_SIZE], tf.float32), }) label = features["label"] features = features["features"] return label, features def read_and_decode_csv(filename_queue): # TODO: Not generic for all datasets reader = tf.TextLineReader() key, value = reader.read(filename_queue) # Default values, in case of empty columns. Also specifies the type of the # decoded result. #record_defaults = [[1], [1], [1], [1], [1]] record_defaults = [[1], [1.0], [1.0], [1.0], [1.0]] col1, col2, col3, col4, col5 = tf.decode_csv( value, record_defaults=record_defaults) label = col1 features = tf.stack([col2, col3, col4, col4]) return label, features # Read TFRecords files for training filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.train_file), num_epochs=EPOCH_NUMBER) if INPUT_FILE_FORMAT == "tfrecord": label, features = read_and_decode_tfrecord(filename_queue) elif INPUT_FILE_FORMAT == "csv": label, features = read_and_decode_csv(filename_queue) batch_labels, batch_features = tf.train.shuffle_batch( [label, features], batch_size=FLAGS.batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Read TFRecords file for validatioin validate_filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(FLAGS.validate_file), num_epochs=EPOCH_NUMBER) if INPUT_FILE_FORMAT == "tfrecord": validate_label, validate_features = read_and_decode_tfrecord( validate_filename_queue) elif INPUT_FILE_FORMAT == "csv": validate_label, validate_features = read_and_decode_csv( validate_filename_queue) validate_batch_labels, validate_batch_features = tf.train.shuffle_batch( [validate_label, validate_features], batch_size=FLAGS.validate_batch_size, num_threads=BATCH_THREAD_NUMBER, capacity=BATCH_CAPACITY, min_after_dequeue=MIN_AFTER_DEQUEUE) # Define the model input_units = FEATURE_SIZE output_units = LABEL_SIZE model_network_hidden_units = [int(i) for i in FLAGS.model_network.split()] def full_connect(inputs, weights_shape, biases_shape, is_train=True): weights = tf.get_variable( "weights", weights_shape, initializer=tf.random_normal_initializer()) biases = tf.get_variable( "biases", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.matmul(inputs, weights) + biases if FLAGS.enable_bn and is_train: mean, var = tf.nn.moments(layer, axes=[0]) scale = tf.get_variable( "scale", biases_shape, initializer=tf.random_normal_initializer()) shift = tf.get_variable( "shift", biases_shape, initializer=tf.random_normal_initializer()) layer = tf.nn.batch_normalization(layer, mean, var, shift, scale, FLAGS.bn_epsilon) return layer def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True): layer = full_connect(inputs, weights_shape, biases_shape, is_train) layer = tf.nn.relu(layer) return layer def customized_inference(inputs, is_train=True): hidden1_units = 128 hidden2_units = 32 hidden3_units = 8 with tf.variable_scope("input"): layer = full_connect_relu(inputs, [input_units, hidden1_units], [hidden1_units], is_train) with tf.variable_scope("layer0"): layer = full_connect_relu(layer, [hidden1_units, hidden2_units], [hidden2_units], is_train) with tf.variable_scope("layer1"): layer = full_connect_relu(layer, [hidden2_units, hidden3_units], [hidden3_units], is_train) if FLAGS.enable_dropout and is_train: layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob) with tf.variable_scope("output"): layer = full_connect(layer, [hidden3_units, output_units], [output_units], is_train) return layer def dnn_inference(inputs, is_train=True): with tf.variable_scope("input"): layer = full_connect_relu(inputs, [input_units, model_network_hidden_units[0]], [model_network_hidden_units[0]], is_train) for i in range(len(model_network_hidden_units) - 1): with tf.variable_scope("layer{}".format(i)): layer = full_connect_relu(layer, [ model_network_hidden_units[i], model_network_hidden_units[i + 1] ], [model_network_hidden_units[i + 1]], is_train) with tf.variable_scope("output"): layer = full_connect(layer, [model_network_hidden_units[-1], output_units], [output_units], is_train) return layer def lr_inference(inputs, is_train=True): with tf.variable_scope("lr"): layer = full_connect(inputs, [input_units, output_units], [output_units]) return layer def wide_and_deep_inference(inputs, is_train=True): return lr_inference(inputs, is_train) + dnn_inference(inputs, is_train) def cnn_inference(inputs, is_train=True): # TODO: Change if validate_batch_size is different # [BATCH_SIZE, 512 * 512 * 1] -> [BATCH_SIZE, 512, 512, 1] inputs = tf.reshape(inputs, [FLAGS.batch_size, 512, 512, 1]) # [BATCH_SIZE, 512, 512, 1] -> [BATCH_SIZE, 128, 128, 8] with tf.variable_scope("conv0"): weights = tf.get_variable( "weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d( inputs, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool( layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 128, 128, 8] -> [BATCH_SIZE, 32, 32, 8] with tf.variable_scope("conv1"): weights = tf.get_variable( "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d( layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool( layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 32, 32, 8] -> [BATCH_SIZE, 8, 8, 8] with tf.variable_scope("conv2"): weights = tf.get_variable( "weights", [3, 3, 8, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d( layer, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool( layer, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding="SAME") # [BATCH_SIZE, 8, 8, 8] -> [BATCH_SIZE, 8 * 8 * 8] layer = tf.reshape(layer, [-1, 8 * 8 * 8]) # [BATCH_SIZE, 8 * 8 * 8] -> [BATCH_SIZE, LABEL_SIZE] with tf.variable_scope("output"): weights = tf.get_variable( "weights", [8 * 8 * 8, LABEL_SIZE], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [LABEL_SIZE], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer def inference(inputs, is_train=True): if MODEL == "dnn": return dnn_inference(inputs, is_train) elif MODEL == "lr": return lr_inference(inputs, is_train) elif MODEL == "wide_and_deep": return wide_and_deep_inference(inputs, is_train) elif MODEL == "customized": return customized_inference(inputs, is_train) elif MODEL == "cnn": return cnn_inference(inputs, is_train) else: logging.error("Unknown model, exit now") exit(1) logging.info("Use the model: {}, model network: {}".format( MODEL, FLAGS.model_network)) logits = inference(batch_features, True) batch_labels = tf.to_int64(batch_labels) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=batch_labels) loss = tf.reduce_mean(cross_entropy, name="loss") global_step = tf.Variable(0, name="global_step", trainable=False) if FLAGS.enable_lr_decay: logging.info( "Enable learning rate decay rate: {}".format(FLAGS.lr_decay_rate)) starter_learning_rate = FLAGS.learning_rate learning_rate = tf.train.exponential_decay( starter_learning_rate, global_step, 100000, FLAGS.lr_decay_rate, staircase=True) else: learning_rate = FLAGS.learning_rate optimizer = get_optimizer(FLAGS.optimizer, learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) tf.get_variable_scope().reuse_variables() # Define accuracy op for train data train_accuracy_logits = inference(batch_features, False) train_softmax = tf.nn.softmax(train_accuracy_logits) train_correct_prediction = tf.equal( tf.argmax(train_softmax, 1), batch_labels) train_accuracy = tf.reduce_mean( tf.cast(train_correct_prediction, tf.float32)) # Define auc op for train data batch_labels = tf.cast(batch_labels, tf.int32) sparse_labels = tf.reshape(batch_labels, [-1, 1]) derived_size = tf.shape(batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax, new_batch_labels) # Define accuracy op for validate data validate_accuracy_logits = inference(validate_batch_features, False) validate_softmax = tf.nn.softmax(validate_accuracy_logits) validate_batch_labels = tf.to_int64(validate_batch_labels) validate_correct_prediction = tf.equal( tf.argmax(validate_softmax, 1), validate_batch_labels) validate_accuracy = tf.reduce_mean( tf.cast(validate_correct_prediction, tf.float32)) # Define auc op for validate data validate_batch_labels = tf.cast(validate_batch_labels, tf.int32) sparse_labels = tf.reshape(validate_batch_labels, [-1, 1]) derived_size = tf.shape(validate_batch_labels)[0] indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1]) concated = tf.concat(axis=1, values=[indices, sparse_labels]) outshape = tf.stack([derived_size, LABEL_SIZE]) new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0) _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax, new_validate_batch_labels) # Define inference op inference_features = tf.placeholder("float", [None, FEATURE_SIZE]) inference_logits = inference(inference_features, False) inference_softmax = tf.nn.softmax(inference_logits) inference_op = tf.argmax(inference_softmax, 1) keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) model_signature = { "inputs": exporter.generic_signature({ "keys": keys_placeholder, "features": inference_features }), "outputs": exporter.generic_signature({ "keys": keys, "softmax": inference_softmax, "prediction": inference_op }) } # Initialize saver and summary saver = tf.train.Saver() tf.summary.scalar("loss", loss) tf.summary.scalar("train_accuracy", train_accuracy) tf.summary.scalar("train_auc", train_auc) tf.summary.scalar("validate_accuracy", validate_accuracy) tf.summary.scalar("validate_auc", validate_auc) summary_op = tf.summary.merge_all() init_op = [ tf.global_variables_initializer(), tf.local_variables_initializer() ] # Create session to run with tf.Session() as sess: logging.info("Start to run with mode: {}".format(MODE)) writer = tf.summary.FileWriter(OUTPUT_PATH, sess.graph) sess.run(init_op) if MODE == "train": # Restore session and start queue runner restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): if FLAGS.benchmark_mode: sess.run(train_op) else: _, step = sess.run([train_op, global_step]) # Print state while training if step % FLAGS.steps_to_validate == 0: loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value, summary_value = sess.run( [ loss, train_accuracy, train_auc, validate_accuracy, validate_auc, summary_op ]) end_time = datetime.datetime.now() logging.info( "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}". format(end_time - start_time, step, loss_value, train_accuracy_value, train_auc_value, validate_accuracy_value, validate_auc_value)) writer.add_summary(summary_value, step) saver.save(sess, CHECKPOINT_FILE, global_step=step) start_time = end_time except tf.errors.OutOfRangeError: if FLAGS.benchmark_mode: print("Finish training for benchmark") exit(0) else: # Export the model after training export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) finally: coord.request_stop() coord.join(threads) elif MODE == "export": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Export the model export_model(sess, saver, model_signature, FLAGS.model_path, FLAGS.model_version) elif MODE == "savedmodel": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) logging.info( "Export the saved model to {}".format(FLAGS.saved_model_path)) export_path_base = FLAGS.saved_model_path export_path = os.path.join( compat.as_bytes(export_path_base), compat.as_bytes(str(FLAGS.model_version))) model_signature = signature_def_utils.build_signature_def( inputs={ "keys": utils.build_tensor_info(keys_placeholder), "features": utils.build_tensor_info(inference_features) }, outputs={ "keys": utils.build_tensor_info(keys), "softmax": utils.build_tensor_info(inference_softmax), "prediction": utils.build_tensor_info(inference_op) }, method_name=signature_constants.PREDICT_METHOD_NAME) try: builder = saved_model_builder.SavedModelBuilder(export_path) builder.add_meta_graph_and_variables( sess, [tag_constants.SERVING], clear_devices=True, signature_def_map={ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: model_signature, }, #legacy_init_op=legacy_init_op) legacy_init_op=tf.group( tf.initialize_all_tables(), name="legacy_init_op")) builder.save() except Exception as e: logging.error("Fail to export saved model, exception: {}".format(e)) elif MODE == "inference": if not restore_session_from_checkpoint(sess, saver, LATEST_CHECKPOINT): logging.error("No checkpoint found, exit now") exit(1) # Load inference test data inference_result_file_name = FLAGS.inference_result_file inference_test_file_name = FLAGS.inference_test_file inference_data = np.genfromtxt(inference_test_file_name, delimiter=",") inference_data_features = inference_data[:, 0:9] inference_data_labels = inference_data[:, 9] # Run inference start_time = datetime.datetime.now() prediction, prediction_softmax = sess.run( [inference_op, inference_softmax], feed_dict={inference_features: inference_data_features}) end_time = datetime.datetime.now() # Compute accuracy label_number = len(inference_data_labels) correct_label_number = 0 for i in range(label_number): if inference_data_labels[i] == prediction[i]: correct_label_number += 1 accuracy = float(correct_label_number) / label_number # Compute auc y_true = np.array(inference_data_labels) y_score = prediction_softmax[:, 1] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=1) auc = metrics.auc(fpr, tpr) logging.info("[{}] Inference accuracy: {}, auc: {}".format( end_time - start_time, accuracy, auc)) # Save result into the file np.savetxt(inference_result_file_name, prediction_softmax, delimiter=",") logging.info( "Save result to file: {}".format(inference_result_file_name))
def detect_video(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''): times, times_2 = [], [] vid = cv2.VideoCapture(video_path) # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 while True: _, img = vid.read() try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors) t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES)) print( "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def multi_modal_network_fp(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in tf that has both state and image inputs, with the feature point architecture (spatial softmax + expectation). Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 3 layer_size = 20 dim_hidden = (n_layers - 1)*[layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 5 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i+dim)) else: x_idx = x_idx + list(range(i, i+dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1]+1] image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1] # image goes through 3 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, num_channels, im_width, im_height]) image_input = tf.transpose(image_input, perm=[0,3,2,1]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias with tf.variable_scope('conv_params'): weights = { 'wc1': init_weights([filter_size, filter_size, num_channels, num_filters[0]], name='wc1'), # 5x5 conv, 1 input, 32 outputs 'wc2': init_weights([filter_size, filter_size, num_filters[0], num_filters[1]], name='wc2'), # 5x5 conv, 32 inputs, 64 outputs 'wc3': init_weights([filter_size, filter_size, num_filters[1], num_filters[2]], name='wc3'), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]], name='bc1'), 'bc2': init_bias([num_filters[1]], name='bc2'), 'bc3': init_bias([num_filters[2]], name='bc3'), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1'], strides=[1,2,2,1]) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) conv_layer_2 = conv2d(img=conv_layer_1, w=weights['wc3'], b=biases['bc3']) _, num_rows, num_cols, num_fp = conv_layer_2.get_shape() num_rows, num_cols, num_fp = [int(x) for x in [num_rows, num_cols, num_fp]] x_map = np.empty([num_rows, num_cols], np.float32) y_map = np.empty([num_rows, num_cols], np.float32) for i in range(num_rows): for j in range(num_cols): x_map[i, j] = (i - num_rows / 2.0) / num_rows y_map[i, j] = (j - num_cols / 2.0) / num_cols x_map = tf.convert_to_tensor(x_map) y_map = tf.convert_to_tensor(y_map) x_map = tf.reshape(x_map, [num_rows * num_cols]) y_map = tf.reshape(y_map, [num_rows * num_cols]) # rearrange features to be [batch_size, num_fp, num_rows, num_cols] features = tf.reshape(tf.transpose(conv_layer_2, [0,3,1,2]), [-1, num_rows*num_cols]) softmax = tf.nn.softmax(features) fp_x = tf.reduce_sum(tf.multiply(x_map, softmax), [1], keep_dims=True) fp_y = tf.reduce_sum(tf.multiply(y_map, softmax), [1], keep_dims=True) fp = tf.reshape(tf.concat(axis=1, values=[fp_x, fp_y]), [-1, num_fp*2]) fc_input = tf.concat(axis=1, values=[fp, state_input]) fc_output, weights_FC, biases_FC = get_mlp_layers(fc_input, n_layers, dim_hidden) fc_vars = weights_FC + biases_FC loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) nnet = TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss], fp=fp) last_conv_vars = fc_input return nnet, fc_vars, last_conv_vars
b_conv2 = bias_variable([12]) h_conv2 = tf.nn.relu(conv2d(h_pool1_drop, W_conv2) + b_conv2) h_pool2 = max_pool_4x4(h_conv2) #h_pool2_drop = tf.nn.dropout(h_pool2, keep_prob) W_fc1 = weight_variable([3*3*12 + 49, 30]) b_fc1 = bias_variable([30]) h_flat = tf.reshape(h_pool2, [-1,3*3*12]) #h_flat_drop = tf.nn.dropout(h_flat, keep_prob) #h_flat_sigmoid = tf.nn.sigmoid(h_flat) features = tf.placeholder(tf.float32, [None,49]) h_flat_features = tf.concat([h_flat,features],1) h_flat_features_drop = tf.nn.dropout(h_flat_features, 0.8) h_fc1 = tf.nn.relu(tf.matmul(h_flat_features, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #W_fc2 = weight_variable([30, 15]) #b_fc2 = bias_variable([15]) #h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) #h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob) W_fc3 = weight_variable([30, 1]) b_fc3 = bias_variable_out([1])
def detect_video_bgs(Yolo, video_path, output_path, log_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', draw_roi=False, zoom=0, show_diver=True): times, times_2 = [], [] vid = cv2.VideoCapture(video_path) # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 LOW = np.array([80, 0, 200]) HIGH = np.array([255, 110, 255]) log = pd.DataFrame(columns=[ "vis_px", "vis_px_pc", "total_px", "total_px_pc", "diff", "diff_pc" ]) while True: _, img = vid.read() try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() pred_bbox = Yolo.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') #Countour BGS: hsv = cv2.cvtColor(original_image, cv2.COLOR_BGR2HSV) # mask image fgMask = cv2.inRange(hsv, LOW, HIGH) #(x1, y1), (x2, y2) = (bboxes[0], bboxes[1]), (bboxes[2], bboxes[3]) splash_boxes = [ i for i in bboxes if CLASS_INDECES[int(i[5])] == "splash" ] if splash_boxes: splash_x_min, splash_y_min, splash_x_max, splash_y_max = splash_bbox_roi( splash_boxes=splash_boxes, zoom=zoom) #normal_image: number_of_white_pix = np.sum(fgMask == 255) number_total_pix = fgMask.shape[0] * fgMask.shape[1] print("Normal_image: Number of white pixels: {} ({}%)".format( number_of_white_pix, round((number_of_white_pix / number_total_pix) * 100), 2)) #splash_roi: splash_roi = fgMask[splash_y_min:splash_y_max, splash_x_min:splash_x_max] roi_number_of_white_pix = np.sum(splash_roi == 255) # roi_number_total_pix = splash_roi.shape[0]*splash_roi.shape[1] print("Roi: Number of white pixels: {} ({}%)".format( roi_number_of_white_pix, round((roi_number_of_white_pix / number_total_pix) * 100), 2)) pixel_diff = abs(roi_number_of_white_pix - number_of_white_pix) image = cv2.cvtColor(fgMask, cv2.COLOR_GRAY2RGB) if draw_roi: # image = draw_bbox(image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors) #splash_x_min,splash_y_min,splash_x_max,splash_y_max image = cv2.rectangle(image, (splash_x_min, splash_y_min), (splash_x_max, splash_y_max), (255, 0, 0), 2) else: # create mask and apply mask = np.zeros(image.shape[:2], dtype="uint8") cv2.rectangle(mask, (splash_x_min, splash_y_min), (splash_x_max, splash_y_max), 255, -1) masked = cv2.bitwise_and(image, image, mask=mask) image = masked #Recolor image = recolor_bw(image, splash_red=True) #Calcs vis_px_pc = round( (roi_number_of_white_pix / number_total_pix) * 100, 2) total_px_pc = round((number_of_white_pix / number_total_pix) * 100, 2) diff_pc = round( (roi_number_of_white_pix / number_of_white_pix) * 100, 2) image = cv2.putText( image, "Vis. PXs (roi): {} ({}%) Total wPXs: {} ({}%) Diff: {} ({}%) " .format(roi_number_of_white_pix, vis_px_pc, number_of_white_pix, total_px_pc, pixel_diff, diff_pc), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.7, (0, 0, 255), 1) # Create logs: log = log.append( { "vis_px": roi_number_of_white_pix, "vis_px_pc": vis_px_pc, "total_px": number_of_white_pix, "total_px_pc": total_px_pc, "diff": pixel_diff, "diff_pc": diff_pc }, ignore_index=True) else: if not show_diver: #No splash and no diver should be shown. image = np.zeros(original_image.shape[:2], dtype="uint8") image = recolor_bw(image, splash_red=False) else: image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors) t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) # image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, # (0, 0, 255), 2) # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES)) print( "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break log.to_csv(log_path)
def _build(self, inputs, order='btu', medium=None, sequence_length_major=None, sequence_length_minor=None, **kwargs): """Encodes the inputs. Args: inputs: A 4-D tensor of shape `[B, T, U, dim]`, where - B: batch_size - T: the max length of high-level sequences. E.g., the max \ number of utterances in dialog history. - U: the max length of low-level sequences. E.g., the max \ length of each utterance in dialog history. - dim: embedding dimension The order of first three dimensions can be changed according to :attr:`order`. order: A 3-char string containing 'b', 't', and 'u', that specifies the order of inputs dimensions above. Following four can be accepted: - **'btu'**: None of the encoders are time-major. - **'utb'**: Both encoders are time-major. - **'tbu'**: The major encoder is time-major. - **'ubt'**: The minor encoder is time-major. medium (optional): A list of callables that subsequently process the final states of minor encoder and obtain the inputs for the major encoder. If not specified, :meth:`flatten` is used for processing the minor's final states. sequence_length_major (optional): The `sequence_length` argument sent to major encoder. This is a 1-D Tensor of shape `[B]`. sequence_length_minor (optional): The `sequence_length` argument sent to minor encoder. It can be either a 1-D Tensor of shape `[B*T]`, or a 2-D Tensor of shape `[B, T]` or `[T, B]` according to :attr:`order`. **kwargs: Other keyword arguments for the major and minor encoders, such as `initial_state`, etc. Note that `sequence_length`, and `time_major` must not be included here. `time_major` is derived from :attr:`order` automatically. By default, arguments will be sent to both major and minor encoders. To specify which encoder an argument should be sent to, add '_minor'/'_major' as its suffix. Note that `initial_state_minor` must have a batch dimension of size `B*T`. If you have an initial state of batch dimension = `T`, use :meth:`tile_initial_state_minor` to tile it according to `order`. Returns: A tuple `(outputs, final_state)` by the major encoder. See the return values of `_build()` method of respective encoder class for details. """ def _kwargs_split(kwargs): kwargs_minor, kwargs_major = {}, {} for k, v in kwargs.items(): if len(k) >= 6 and k[-6:] == ['_minor']: kwargs_minor[k[:-6]] = v if len(k) >= 6 and k[-6:] == ['_major']: kwargs_major[k[:-6]] = v return kwargs_minor, kwargs_major kwargs_minor, kwargs_major = _kwargs_split(kwargs) if sequence_length_minor is not None: sequence_length_minor = tf.reshape(sequence_length_minor, [-1]) kwargs_minor['sequence_length'] = sequence_length_minor kwargs_major['sequence_length'] = sequence_length_major expand, shape = self._get_flatten_order( order, kwargs_minor, kwargs_major, tf.shape(inputs)) inputs = tf.reshape(inputs, shape + [inputs.shape[3]]) _, states_minor = self._encoder_minor(inputs, **kwargs_minor) self.states_minor_before_medium = states_minor if medium is None: states_minor = self.flatten(states_minor) else: if not isinstance(medium, collections.Sequence): medium = [medium] for fn in medium: if isinstance(fn, str) and fn == 'flatten': states_minor = self.flatten(states_minor) else: states_minor = fn(states_minor) self.states_minor_after_medium = states_minor states_minor = tf.reshape( states_minor, tf.concat([expand, tf.shape(states_minor)[1:]], 0)) outputs_major, states_major = self._encoder_major(states_minor, **kwargs_major) # Add trainable variables of `self._cell` which may be constructed # externally if not self._built: self._add_trainable_variable( self._encoder_minor.trainable_variables) self._add_trainable_variable( self._encoder_major.trainable_variables) self._built = True return outputs_major, states_major
def detect_video_knn(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', draw_roi=False, zoom=0): #different background subtraction methods # backSub = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=40, detectShadows=False) backSub = cv2.createBackgroundSubtractorKNN() #KNN backSub.setDetectShadows(False) backSub.setDist2Threshold(13000) backSub.setkNNSamples(6) backSub.setNSamples(30) times, times_2 = [], [] vid = cv2.VideoCapture(video_path) # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 while True: _, img = vid.read() try: original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_image), [input_size, input_size]) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() pred_bbox = Yolo.predict(image_data) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') fgMask = backSub.apply(original_image, learningRate=0.9) #(x1, y1), (x2, y2) = (bboxes[0], bboxes[1]), (bboxes[2], bboxes[3]) splash_boxes = [ i for i in bboxes if CLASS_INDECES[int(i[5])] == "splash" ] if splash_boxes: splash_x_min, splash_y_min, splash_x_max, splash_y_max = splash_bbox_roi( splash_boxes=splash_boxes, zoom=zoom) #normal_image: number_of_white_pix = np.sum(fgMask == 255) number_total_pix = fgMask.shape[0] * fgMask.shape[1] print("Normal_image: Number of white pixels: {} ({}%)".format( number_of_white_pix, round((number_of_white_pix / number_total_pix) * 100), 2)) #splash_roi: splash_roi = fgMask[splash_y_min:splash_y_max, splash_x_min:splash_x_max] roi_number_of_white_pix = np.sum(splash_roi == 255) # roi_number_total_pix = splash_roi.shape[0]*splash_roi.shape[1] print("Roi: Number of white pixels: {} ({}%)".format( roi_number_of_white_pix, round((roi_number_of_white_pix / number_total_pix) * 100), 2)) pixel_diff = abs(roi_number_of_white_pix - number_of_white_pix) image = cv2.cvtColor(fgMask, cv2.COLOR_GRAY2RGB) if draw_roi: # image = draw_bbox(image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors) #splash_x_min,splash_y_min,splash_x_max,splash_y_max image = cv2.rectangle(image, (splash_x_min, splash_y_min), (splash_x_max, splash_y_max), (255, 0, 0), 2) else: # create mask and apply mask = np.zeros(image.shape[:2], dtype="uint8") cv2.rectangle(mask, (splash_x_min, splash_y_min), (splash_x_max, splash_y_max), 255, -1) masked = cv2.bitwise_and(image, image, mask=mask) image = masked image = cv2.putText( image, "Vis. PXs (roi): {} ({}%) Total wPXs: {} ({}%) Diff: {} ({}%) " .format( roi_number_of_white_pix, round((roi_number_of_white_pix / number_total_pix) * 100, 2), number_of_white_pix, round((number_of_white_pix / number_total_pix) * 100, 2), pixel_diff, round( (roi_number_of_white_pix / number_of_white_pix) * 100, 2)), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.7, (0, 0, 255), 1) else: #TODO what todo with no splash images ? image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors) t3 = time.time() times.append(t2 - t1) times_2.append(t3 - t1) times = times[-20:] times_2 = times_2[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms fps2 = 1000 / (sum(times_2) / len(times_2) * 1000) # image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, # (0, 0, 255), 2) # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES)) print( "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format( ms, fps, fps2)) if output_path != '': out.write(image) if show: cv2.imshow('output', image) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
def call(self, w, r, attn_mask, mems, head_mask, output_attentions, training=False): qlen, rlen, bsz = shape_list(w)[0], shape_list(r)[0], shape_list(w)[1] if mems is not None: cat = tf.concat([mems, w], 0) if self.pre_lnorm: w_heads = self.qkv_net(self.layer_norm(cat)) else: w_heads = self.qkv_net(cat) r_head_k = self.r_net(r) w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, axis=-1) w_head_q = w_head_q[-qlen:] else: if self.pre_lnorm: w_heads = self.qkv_net(self.layer_norm(w)) else: w_heads = self.qkv_net(w) r_head_k = self.r_net(r) w_head_q, w_head_k, w_head_v = tf.split(w_heads, 3, axis=-1) klen = shape_list(w_head_k)[0] w_head_q = tf.reshape(w_head_q, (qlen, bsz, self.n_head, self.d_head)) # qlen x bsz x n_head x d_head w_head_k = tf.reshape(w_head_k, (klen, bsz, self.n_head, self.d_head)) # qlen x bsz x n_head x d_head w_head_v = tf.reshape(w_head_v, (klen, bsz, self.n_head, self.d_head)) # qlen x bsz x n_head x d_head r_head_k = tf.reshape( r_head_k, (rlen, self.n_head, self.d_head)) # qlen x n_head x d_head # compute attention score rw_head_q = w_head_q + self.r_w_bias # qlen x bsz x n_head x d_head AC = tf.einsum("ibnd,jbnd->ijbn", rw_head_q, w_head_k) # qlen x klen x bsz x n_head rr_head_q = w_head_q + self.r_r_bias BD = tf.einsum("ibnd,jnd->ijbn", rr_head_q, r_head_k) # qlen x klen x bsz x n_head BD = self._rel_shift(BD) # [qlen x klen x bsz x n_head] attn_score = AC + BD attn_score = attn_score * self.scale # compute attention probability if attn_mask is not None: attn_mask_t = attn_mask[:, :, None, None] attn_score = attn_score * (1 - attn_mask_t) - 1e30 * attn_mask_t # [qlen x klen x bsz x n_head] attn_prob = tf.nn.softmax(attn_score, axis=1) attn_prob = self.dropatt(attn_prob, training=training) # Mask heads if we want to if head_mask is not None: attn_prob = attn_prob * head_mask # compute attention vector attn_vec = tf.einsum("ijbn,jbnd->ibnd", attn_prob, w_head_v) # [qlen x bsz x n_head x d_head] attn_vec_sizes = shape_list(attn_vec) attn_vec = tf.reshape( attn_vec, (attn_vec_sizes[0], attn_vec_sizes[1], self.n_head * self.d_head)) # linear projection attn_out = self.o_net(attn_vec) attn_out = self.drop(attn_out, training=training) if self.pre_lnorm: # residual connection outputs = [w + attn_out] else: # residual connection + layer normalization outputs = [self.layer_norm(w + attn_out)] if output_attentions: outputs.append(attn_prob) return outputs
def TFGAN(inputs,targets): traindir = os.path.join(logdir, 'GG12\\PIX2PIX_MINMAX_1024') if tf.gfile.Exists(traindir): tf.gfile.DeleteRecursively(traindir) tf.gfile.MakeDirs(traindir) # Create a GANModel tuple. fiber_output, fiber_input = inputs encoder, label = targets real_data = tf.concat((label,fiber_input),-1) ####################################################################### ########################## GAN MODEL ################################# ####################################################################### gan_model = tfgan.gan_model( generator_fn=generator_fn, discriminator_fn=pix2pix_D, real_data=real_data, generator_inputs=fiber_output, generator_scope='Generator', discriminator_scope='Discriminator') ####################################################################### ########################## GAN SUMMARY ############################### ####################################################################### with tf.name_scope('Train_summary'): generated_data, generated_input = tf.split(gan_model.generated_data,2,-1) reshaped_fiber_input = get_summary_image(fiber_input, FLAGS.grid_size) reshaped_label = get_summary_image(label, FLAGS.grid_size) reshaped_generated_input = get_summary_image(generated_input, FLAGS.grid_size) reshaped_generated_data = get_summary_image(generated_data, FLAGS.grid_size) tf.summary.image('Input_Fiber', reshaped_fiber_input) tf.summary.image('Input_Generator', reshaped_generated_input) tf.summary.image('Data_Real', reshaped_label) tf.summary.image('Data_Generator', reshaped_generated_data) ####################################################################### ########################## GAN LOSS ################################# ####################################################################### with tf.name_scope('pixel_loss'): pixel_loss = combine_loss(gan_model.generated_data, gan_model.real_data, add_summary=True) with tf.name_scope('gan_loss'): gan_loss = tfgan.gan_loss( gan_model, generator_loss_fn=tfgan.losses.modified_generator_loss, discriminator_loss_fn=tfgan.losses.modified_discriminator_loss, gradient_penalty_weight=1.0, # only in wassertein_loss ) tfgan.eval.add_regularization_loss_summaries(gan_model) with tf.name_scope('Train_Loss'): gan_loss = tfgan.losses.combine_adversarial_loss( gan_loss, gan_model, pixel_loss, weight_factor=FLAGS.adversarial_loss_weight) ####################################################################### ########################## GAN OPS ################################ ####################################################################### with tf.name_scope('Train_ops'): gen_lr = get_lr(1e-5,decay_steps=5000) dis_lr = get_lr(5e-5,decay_steps=5000) train_ops = tfgan.gan_train_ops( gan_model, gan_loss, generator_optimizer=get_optimizer(gen_lr), discriminator_optimizer=get_optimizer(dis_lr), # summarize_gradients=False, # colocate_gradients_with_ops=True, # transform_grads_fn=tf.contrib.training.clip_gradient_norms_fn(1e3), # aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N) ) psnr = tf.reduce_mean(tf.image.psnr(generated_data, label, max_val = 1.0)) ssim = tf.reduce_mean(tf.image.ssim(generated_data, label, max_val = 1.0)) corr = correlation(generated_data, label) tf.summary.scalar('PSNR', psnr) tf.summary.scalar('SSIM', ssim) tf.summary.scalar('Relation', corr) tf.summary.scalar('generator_lr', gen_lr) # tf.summary.scalar('discriminator_lr', dis_lr) ####################################################################### ########################## GAN TRAIN ############################## ####################################################################### train_steps = tfgan.GANTrainSteps(generator_train_steps=1, discriminator_train_steps=1) message = tf.string_join([' Train step: ', tf.as_string(tf.train.get_or_create_global_step()), ' PSNR:', tf.as_string(psnr), ' SSIM:', tf.as_string(ssim), ' Correlation:', tf.as_string(corr) ], name='status_message') tfgan.gan_train(train_ops, logdir = traindir, get_hooks_fn=tfgan.get_joint_train_hooks(train_steps), hooks=[tf.train.StopAtStepHook(num_steps=FLAGS.max_iter), tf.train.LoggingTensorHook([message], every_n_iter=FLAGS.log_n_steps), get_tfgan_init_fn('E:\GitHub\MMFI\log\\GG12\\CNN', 'Generator'), # get_tfgan_init_fn('E:\GitHub\MMFI\log\\G2\\pix2pix_D', 'Discriminator'), ], save_summaries_steps = FLAGS.save_summaries_steps*2, save_checkpoint_secs = FLAGS.save_interval_secs)
def encoder(source, params): mask = tf.to_float(tf.cast(source, tf.bool)) hidden_size = params.hidden_size source, mask = util.remove_invalid_seq(source, mask) embed_name = "embedding" if params.shared_source_target_embedding \ else "src_embedding" src_emb = tf.get_variable(embed_name, [params.src_vocab.size(), params.embed_size]) src_bias = tf.get_variable("bias", [params.embed_size]) inputs = tf.gather(src_emb, source) inputs = tf.nn.bias_add(inputs, src_bias) if util.valid_dropout(params.dropout): inputs = tf.nn.dropout(inputs, 1. - params.dropout) with tf.variable_scope("encoder"): x = inputs for layer in range(params.num_encoder_layer): with tf.variable_scope("layer_{}".format(layer)): # forward rnn with tf.variable_scope('forward'): outputs = rnn.rnn(params.cell, x, hidden_size, mask=mask, ln=params.layer_norm, sm=params.swap_memory, dp=params.dropout) output_fw, state_fw = outputs[1] if layer == 0: # backward rnn with tf.variable_scope('backward'): if not params.caencoder: outputs = rnn.rnn(params.cell, tf.reverse(x, [1]), hidden_size, mask=tf.reverse(mask, [1]), ln=params.layer_norm, sm=params.swap_memory, dp=params.dropout) output_bw, state_bw = outputs[1] else: outputs = rnn.cond_rnn(params.cell, tf.reverse(x, [1]), tf.reverse(output_fw, [1]), hidden_size, mask=tf.reverse(mask, [1]), ln=params.layer_norm, sm=params.swap_memory, num_heads=params.num_heads, one2one=True) output_bw, state_bw = outputs[1] output_bw = tf.reverse(output_bw, [1]) if not params.caencoder: y = tf.concat([output_fw, output_bw], -1) z = tf.concat([state_fw, state_bw], -1) else: y = output_bw z = state_bw else: y = output_fw z = state_fw y = func.linear(y, hidden_size, ln=False, scope="ff") # short cut via residual connection if x.get_shape()[-1].value == y.get_shape()[-1].value: x = func.residual_fn(x, y, dropout=params.dropout) else: x = y if params.layer_norm: x = func.layer_norm(x, scope="ln") with tf.variable_scope("decoder_initializer"): decoder_cell = rnn.get_cell(params.cell, hidden_size, ln=params.layer_norm) return { "encodes": x, "decoder_initializer": { "layer_{}".format(l): decoder_cell.get_init_state(x=z, scope="layer_{}".format(l)) for l in range(params.num_decoder_layer) }, "mask": mask }
def call( self, inputs, mems=None, head_mask=None, inputs_embeds=None, output_attentions=None, output_hidden_states=None, training=False, ): if isinstance(inputs, (tuple, list)): input_ids = inputs[0] mems = inputs[1] if len(inputs) > 1 else mems head_mask = inputs[2] if len(inputs) > 2 else head_mask inputs_embeds = inputs[3] if len(inputs) > 3 else inputs_embeds output_attentions = inputs[4] if len( inputs) > 4 else output_attentions output_hidden_states = inputs[5] if len( inputs) > 4 else output_hidden_states assert len(inputs) <= 6, "Too many inputs." elif isinstance(inputs, (dict, BatchEncoding)): input_ids = inputs.get("input_ids") mems = inputs.get("mems", mems) head_mask = inputs.get("head_mask", head_mask) inputs_embeds = inputs.get("inputs_embeds", inputs_embeds) output_attentions = inputs.get("output_attentions", output_attentions) output_hidden_states = inputs.get("output_hidden_states", output_hidden_states) assert len(inputs) <= 6, "Too many inputs." else: input_ids = inputs output_attentions = output_attentions if output_attentions is not None else self.output_attentions output_hidden_states = output_hidden_states if output_hidden_states is not None else self.output_hidden_states # the original code for Transformer-XL used shapes [len, bsz] but we want a unified interface in the library # so we transpose here from shape [bsz, len] to shape [len, bsz] if input_ids is not None and inputs_embeds is not None: raise ValueError( "You cannot specify both input_ids and inputs_embeds at the same time" ) elif input_ids is not None: input_ids = tf.transpose(input_ids, perm=(1, 0)) qlen, bsz = shape_list(input_ids) elif inputs_embeds is not None: inputs_embeds = tf.transpose(inputs_embeds, perm=(1, 0, 2)) qlen, bsz = shape_list(inputs_embeds)[:2] else: raise ValueError( "You have to specify either input_ids or inputs_embeds") if mems is None: mems = self.init_mems(bsz) # Prepare head mask if needed # 1.0 in head_mask indicate we keep the head # attention_probs has shape bsz x n_heads x N x N # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] (a head_mask for each layer) # and head_mask is converted to shape [num_hidden_layers x qlen x klen x bsz x n_head] if head_mask is not None: raise NotImplementedError else: head_mask = [None] * self.n_layer if inputs_embeds is not None: word_emb = inputs_embeds else: word_emb = self.word_emb(input_ids) mlen = shape_list(mems[0])[0] if mems is not None else 0 klen = mlen + qlen attn_mask = tf.ones([qlen, qlen]) mask_u = tf.linalg.band_part(attn_mask, 0, -1) mask_dia = tf.linalg.band_part(attn_mask, 0, 0) attn_mask_pad = tf.zeros([qlen, mlen]) dec_attn_mask = tf.concat([attn_mask_pad, mask_u - mask_dia], 1) if self.same_length: mask_l = tf.linalg.band_part(attn_mask, -1, 0) dec_attn_mask = tf.concat([ dec_attn_mask[:, :qlen] + mask_l - mask_dia, dec_attn_mask[:, qlen:] ], 1) # ::: PyTorch masking code for reference ::: # if self.same_length: # all_ones = word_emb.new_ones((qlen, klen), dtype=torch.uint8) # mask_len = klen - self.mem_len # if mask_len > 0: # mask_shift_len = qlen - mask_len # else: # mask_shift_len = qlen # dec_attn_mask = (torch.triu(all_ones, 1+mlen) # + torch.tril(all_ones, -mask_shift_len))[:, :, None] # -1 # else: # dec_attn_mask = torch.triu( # word_emb.new_ones((qlen, klen), dtype=torch.uint8), diagonal=1+mlen)[:,:,None] hids = [] attentions = [] if self.attn_type == 0: # default pos_seq = tf.range(klen - 1, -1, -1.0) if self.clamp_len > 0: pos_seq = tf.minimum(pos_seq, self.clamp_len) pos_emb = self.pos_emb(pos_seq) core_out = self.drop(word_emb, training=training) pos_emb = self.drop(pos_emb, training=training) for i, layer in enumerate(self.layers): hids.append(core_out) mems_i = None if mems is None else mems[i] layer_outputs = layer( core_out, pos_emb, dec_attn_mask, mems_i, head_mask[i], output_attentions, training=training, ) core_out = layer_outputs[0] if output_attentions: attentions.append(layer_outputs[1]) else: # learnable embeddings and absolute embeddings raise NotImplementedError # Removed these to avoid maintaining dead code - They are not used in our pretrained checkpoint core_out = self.drop(core_out, training=training) new_mems = self._update_mems(hids, mems, mlen, qlen) # We transpose back here to shape [bsz, len, hidden_dim] outputs = [tf.transpose(core_out, perm=(1, 0, 2)), new_mems] if output_hidden_states: # Add last layer and transpose to library standard shape [bsz, len, hidden_dim] hids.append(core_out) hids = list(tf.transpose(t, perm=(1, 0, 2)) for t in hids) outputs.append(hids) if output_attentions: # Transpose to library standard shape [bsz, n_heads, query_seq_len, key_seq_len] attentions = list( tf.transpose(t, perm=(2, 3, 0, 1)) for t in attentions) outputs.append(attentions) return outputs # last hidden state, new_mems, (all hidden states), (all attentions)
def __init__(self, constants_dictionary, data_dictionary): self.encoder_train_inp = data_dictionary['encoder_train_inp'] self.source_embedding = data_dictionary['encoder_embedding'] self.encoder_sequence_length = data_dictionary['encoder_sequence_length'] encoder_emb_inp = tf.nn.embedding_lookup(self.source_embedding, self.encoder_train_inp) encoder_emb_inp_time_major = tf.transpose(encoder_emb_inp, perm=[1, 0, 2]) # encoder_sequence_length = tf.placeholder(tf.int32, shape=[None, ]) self.decoder_train_inp = data_dictionary['decoder_train_inp'] self.target_embedding = data_dictionary['decoder_embedding'] self.decoder_sequence_length = data_dictionary['decoder_sequence_length'] decoder_emb_inp = tf.nn.embedding_lookup(self.target_embedding, self.decoder_train_inp) decoder_emb_inp_time_major = tf.transpose(decoder_emb_inp, perm=[1, 0, 2]) # decoder_sequence_length = tf.placeholder(tf.int32, shape=[None, ]) self.dec_train_labels = data_dictionary['dec_train_labels'] target_train_one_hot = tf.one_hot(self.dec_train_labels, constants_dictionary['TARGET_VOCAB_SIZE'], on_value=1.0, off_value=0.0) # processed_input_encoder = tf.transpose(enc_emb, perm=[1, 0, 2]) initial_hidden_encoder = tf.zeros([constants_dictionary['BATCH_SIZE'], constants_dictionary['HIDDEN_LAYER_SIZE_ENCODER']]) projection_layer = tf.layers.Dense(constants_dictionary['TARGET_VOCAB_SIZE'], use_bias=False) with tf.variable_scope('encoder'): encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(constants_dictionary['NUM_UNITS']) encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_emb_inp, sequence_length=self.encoder_sequence_length, dtype=tf.float32) with tf.variable_scope('decoder'): decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(constants_dictionary['NUM_UNITS']) helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inp, self.decoder_sequence_length) decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state, output_layer=projection_layer) outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder) logits = outputs.rnn_output self.prediction_output = tf.argmax(tf.nn.softmax(logits), axis=2) self.loss_batch = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf.concat(axis=0, values=target_train_one_hot)) self.loss = tf.reduce_mean(self.loss_batch)
def get_slice(data, idx, parts): shape = tf.shape(data) size = tf.concat(0, [ shape[:1]/parts, shape[1:] ]) stride = tf.concat(0, [ shape[:1]/parts, shape[1:]*0 ]) start = stride * idx return tf.slice(data, start, size)
def create_generator(generator_inputs, generator_outputs_channels): layers = [] print('encoder:') print(generator_inputs.shape) # encoder_1: [batch, 256, 256, in_channels] => [batch, 128, 128, ngf] with tf.variable_scope("encoder_1"): output = gen_conv(generator_inputs, a.ngf) layers.append(output) print(output.shape) layer_specs = [ a.ngf * 2, # encoder_2: [batch, 128, 128, ngf] => [batch, 64, 64, ngf * 2] a.ngf * 4, # encoder_3: [batch, 64, 64, ngf * 2] => [batch, 32, 32, ngf * 4] a.ngf * 8, # encoder_4: [batch, 32, 32, ngf * 4] => [batch, 16, 16, ngf * 8] a.ngf * 8, # encoder_5: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8] a.ngf * 8, # encoder_6: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8] a.ngf * 8, # encoder_7: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8] a.ngf * 8, # encoder_8: [batch, 2, 2, ngf * 8] => [batch, 1, 1, ngf * 8] ] for out_channels in layer_specs[:6]: with tf.variable_scope("encoder_%d" % (len(layers) + 1)): rectified = lrelu(layers[-1], 0.2) # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] # orig: --------------------------- # convolved = gen_conv(rectified, out_channels) # Moha: --------------------------- convolved = gen_conv_dilate(rectified, out_channels) #convolved = gen_conv(rectified, out_channels) print(convolved.shape) # end Moha ----------------------------- output = batchnorm(convolved) layers.append(output) with tf.variable_scope("encoder_%d" % (len(layers) + 1)): rectified = lrelu(layers[-1], 0.2) convolved = gen_conv(rectified, layer_specs[6]) print(convolved.shape) output = batchnorm(convolved) layers.append(output) print('decoder:') layer_specs = [ (a.ngf * 8, 0.5), # decoder_8: [batch, 1, 1, ngf * 8] => [batch, 2, 2, ngf * 8 * 2] (a.ngf * 8, 0.5), # decoder_7: [batch, 2, 2, ngf * 8 * 2] => [batch, 4, 4, ngf * 8 * 2] (a.ngf * 8, 0.5), # decoder_6: [batch, 4, 4, ngf * 8 * 2] => [batch, 8, 8, ngf * 8 * 2] (a.ngf * 8, 0.0), # decoder_5: [batch, 8, 8, ngf * 8 * 2] => [batch, 16, 16, ngf * 8 * 2] (a.ngf * 4, 0.0), # decoder_4: [batch, 16, 16, ngf * 8 * 2] => [batch, 32, 32, ngf * 4 * 2] (a.ngf * 2, 0.0), # decoder_3: [batch, 32, 32, ngf * 4 * 2] => [batch, 64, 64, ngf * 2 * 2] (a.ngf, 0.0), # decoder_2: [batch, 64, 64, ngf * 2 * 2] => [batch, 128, 128, ngf * 2] ] num_encoder_layers = len(layers) for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): skip_layer = num_encoder_layers - decoder_layer - 1 with tf.variable_scope("decoder_%d" % (skip_layer + 1)): if decoder_layer == 0: # first decoder layer doesn't have skip connections # since it is directly connected to the skip_layer input = layers[-1] else: input = tf.concat([layers[-1], layers[skip_layer]], axis=3) rectified = tf.nn.relu(input) # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] output = gen_deconv(rectified, out_channels) output = batchnorm(output) if dropout > 0.0: output = tf.nn.dropout(output, keep_prob=1 - dropout) layers.append(output) print(output.shape) # decoder_1: [batch, 128, 128, ngf * 2] => [batch, 256, 256, generator_outputs_channels] with tf.variable_scope("decoder_1"): input = tf.concat([layers[-1], layers[0]], axis=3) rectified = tf.nn.relu(input) output = gen_deconv(rectified, generator_outputs_channels) output = tf.tanh(output) layers.append(output) print(output.shape) return layers[-1]
def decoder(target, state, params): mask = tf.to_float(tf.cast(target, tf.bool)) hidden_size = params.hidden_size if 'decoder' not in state: target, mask = util.remove_invalid_seq(target, mask) embed_name = "embedding" if params.shared_source_target_embedding \ else "tgt_embedding" tgt_emb = tf.get_variable(embed_name, [params.tgt_vocab.size(), params.embed_size]) tgt_bias = tf.get_variable("bias", [params.embed_size]) inputs = tf.gather(tgt_emb, target) inputs = tf.nn.bias_add(inputs, tgt_bias) # shift if 'decoder' not in state: inputs = tf.pad(inputs, [[0, 0], [1, 0], [0, 0]]) inputs = inputs[:, :-1, :] else: inputs = tf.cond( tf.reduce_all(tf.equal(target, params.tgt_vocab.pad())), lambda: tf.zeros_like(inputs), lambda: inputs) mask = tf.ones_like(mask) if util.valid_dropout(params.dropout): inputs = tf.nn.dropout(inputs, 1. - params.dropout) with tf.variable_scope("decoder"): x = inputs for layer in range(params.num_decoder_layer): with tf.variable_scope("layer_{}".format(layer)): init_state = state["decoder_initializer"]["layer_{}".format( layer)] if 'decoder' in state: init_state = state["decoder"]["state"]["layer_{}".format( layer)] if layer == 0 or params.use_deep_att: returns = rnn.cond_rnn(params.cell, x, state["encodes"], hidden_size, init_state=init_state, mask=mask, num_heads=params.num_heads, mem_mask=state["mask"], ln=params.layer_norm, sm=params.swap_memory, one2one=False, dp=params.dropout) (_, hidden_state), (outputs, _), contexts, attentions = returns c = contexts else: if params.caencoder: returns = rnn.cond_rnn(params.cell, x, c, hidden_size, init_state=init_state, mask=mask, mem_mask=mask, ln=params.layer_norm, sm=params.swap_memory, num_heads=params.num_heads, one2one=True, dp=params.dropout) (_, hidden_state), (outputs, _), contexts, attentions = returns else: outputs = rnn.rnn(params.cell, tf.concat([x, c], -1), hidden_size, mask=mask, init_state=init_state, ln=params.layer_norm, sm=params.swap_memory, dp=params.dropout) outputs, hidden_state = outputs[1] if 'decoder' in state: state['decoder']['state']['layer_{}'.format( layer)] = hidden_state y = func.linear(outputs, hidden_size, ln=False, scope="ff") # short cut via residual connection if x.get_shape()[-1].value == y.get_shape()[-1].value: x = func.residual_fn(x, y, dropout=params.dropout) else: x = y if params.layer_norm: x = func.layer_norm(x, scope="ln") feature = func.linear(tf.concat([x, c], -1), params.embed_size, ln=params.layer_norm, scope="ff") feature = tf.nn.tanh(feature) if util.valid_dropout(params.dropout): feature = tf.nn.dropout(feature, 1. - params.dropout) if 'dev_decode' in state: feature = x[:, -1, :] embed_name = "tgt_embedding" if params.shared_target_softmax_embedding \ else "softmax_embedding" embed_name = "embedding" if params.shared_source_target_embedding \ else embed_name softmax_emb = tf.get_variable(embed_name, [params.tgt_vocab.size(), params.embed_size]) feature = tf.reshape(feature, [-1, params.embed_size]) logits = tf.matmul(feature, softmax_emb, False, True) soft_label, normalizer = util.label_smooth(target, util.shape_list(logits)[-1], factor=params.label_smooth) centropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=soft_label) centropy -= normalizer centropy = tf.reshape(centropy, tf.shape(target)) loss = tf.reduce_sum(centropy * mask, -1) / tf.reduce_sum(mask, -1) loss = tf.reduce_mean(loss) # these mask tricks mainly used to deal with zero shapes, such as [0, 1] loss = tf.cond(tf.equal(tf.shape(target)[0], 0), lambda: tf.constant(0, dtype=tf.float32), lambda: loss) return loss, logits, state
def comp2re(self, x): return tf.concat((x[:,0],x[:,1]), axis=0)
def create_model_MT(inputs, targets_1, targets_2): def create_discriminator(discrim_inputs, discrim_targets): n_layers = 3 layers = [] print('discriminator:') # 2x [batch, height, width, in_channels] => [batch, height, width, in_channels * 2] input = tf.concat([discrim_inputs, discrim_targets], axis=3) print(input.shape) # layer_1: [batch, 256, 256, in_channels * 2] => [batch, 128, 128, ndf] with tf.variable_scope("layer_1"): convolved = discrim_conv(input, a.ndf, stride=2) rectified = lrelu(convolved, 0.2) layers.append(rectified) print(convolved.shape) # layer_2: [batch, 128, 128, ndf] => [batch, 64, 64, ndf * 2] # layer_3: [batch, 64, 64, ndf * 2] => [batch, 32, 32, ndf * 4] # layer_4: [batch, 32, 32, ndf * 4] => [batch, 31, 31, ndf * 8] for i in range(n_layers): with tf.variable_scope("layer_%d" % (len(layers) + 1)): out_channels = a.ndf * min(2**(i+1), 8) stride = 1 if i == n_layers - 1 else 2 # last layer here has stride 1 convolved = discrim_conv(layers[-1], out_channels, stride=stride) normalized = batchnorm(convolved) rectified = lrelu(normalized, 0.2) layers.append(rectified) print(convolved.shape) # layer_5: [batch, 31, 31, ndf * 8] => [batch, 30, 30, 1] with tf.variable_scope("layer_%d" % (len(layers) + 1)): convolved = discrim_conv(rectified, out_channels=1, stride=1) output = tf.sigmoid(convolved) layers.append(output) print(output.shape) return layers[-1] targets=tf.concat([targets_1, targets_2],axis=3) with tf.variable_scope("generator"): out_channels = int(targets.get_shape()[-1]) outputs = create_generator(inputs, out_channels) # create two copies of discriminator, one for real pairs and one for fake pairs # they share the same underlying variables with tf.name_scope("real_discriminator"): with tf.variable_scope("discriminator"): # 2x [batch, height, width, channels] => [batch, 30, 30, 1] predict_real = create_discriminator(inputs, targets) with tf.name_scope("fake_discriminator"): with tf.variable_scope("discriminator", reuse=True): # 2x [batch, height, width, channels] => [batch, 30, 30, 1] predict_fake = create_discriminator(inputs, outputs) with tf.name_scope("discriminator_loss"): # minimizing -tf.log will try to get inputs to 1 # predict_real => 1 # predict_fake => 0 discrim_loss = tf.reduce_mean(-(tf.log(predict_real + EPS) + tf.log(1 - predict_fake + EPS))) discrim_loss_real=tf.reduce_mean(-(tf.log(predict_real + EPS))) discrim_loss_fake=tf.reduce_mean(-(tf.log(1 - predict_fake + EPS))) with tf.name_scope("generator_loss"): # predict_fake => 1 # abs(targets - outputs) => 0 gen_loss_GAN = tf.reduce_mean(-tf.log(predict_fake + EPS)) gen_loss_L1 = tf.reduce_mean(tf.abs(targets - outputs)) gen_loss_dice=1 - dice_coe(outputs, targets,loss_type='sorensen') gen_loss_jaccard=1 - dice_coe(outputs, targets,loss_type='jaccard') gen_loss_Tversky=tf.abs(1-tversky_loss(targets,outputs)) gen_loss = gen_loss_GAN * a.gan_weight + gen_loss_L1 * a.l1_weight with tf.name_scope("discriminator_train"): discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")] discrim_optim = tf.train.AdamOptimizer(a.lr, a.beta1) discrim_grads_and_vars = discrim_optim.compute_gradients(discrim_loss, var_list=discrim_tvars) discrim_train = discrim_optim.apply_gradients(discrim_grads_and_vars) with tf.name_scope("generator_train"): with tf.control_dependencies([discrim_train]): gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("generator")] gen_optim = tf.train.AdamOptimizer(a.lr, a.beta1) gen_grads_and_vars = gen_optim.compute_gradients(gen_loss, var_list=gen_tvars) gen_train = gen_optim.apply_gradients(gen_grads_and_vars) ema = tf.train.ExponentialMovingAverage(decay=0.99) # orig: ---------------------- #update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1]) # end orig # Moha: ---------------------- update_losses = ema.apply([discrim_loss, gen_loss_GAN, gen_loss_L1, gen_loss,discrim_loss_real,discrim_loss_fake, gen_loss_jaccard, gen_loss_dice,gen_loss_Tversky]) outputs_1=outputs[:,:,:,:3] outputs_2=outputs[:,:,:,3:] # End Moha global_step = tf.train.get_or_create_global_step() incr_global_step = tf.assign(global_step, global_step+1) return Model( predict_real=predict_real, predict_fake=predict_fake, discrim_loss=ema.average(discrim_loss), discrim_grads_and_vars=discrim_grads_and_vars, gen_loss_GAN=ema.average(gen_loss_GAN), gen_loss_L1=ema.average(gen_loss_L1), gen_grads_and_vars=gen_grads_and_vars, train=tf.group(update_losses, incr_global_step, gen_train), # Noha: ----------------- outputs_1=outputs_1, outputs_2=outputs_2, gen_loss=ema.average(gen_loss), discrim_loss_fake=ema.average(discrim_loss_fake), discrim_loss_real=ema.average(discrim_loss_real), gen_loss_jaccard=ema.average(gen_loss_jaccard), gen_loss_dice=ema.average(gen_loss_dice), gen_loss_Tversky=ema.average(gen_loss_Tversky) # End Moha )