def connector_capsule_mat(input_tensor, position_grid, input_activation, input_dim, output_dim, layer_name, num_routing=3, num_in_atoms=3, num_out_atoms=3, leaky=False, final_beta=1.0, min_var=0.0005): """Final Capsule Layer with Pose Matrices and Shared connections.""" # One weight tensor for each capsule of the layer bellow: w: [8*128, 8*10] with tf.variable_scope(layer_name): # This Variable will hold the state of the weights for the layer with tf.name_scope('input_center_connector'): utils.activation_summary(input_tensor) weights = utils.weight_variable( [input_dim, num_out_atoms, output_dim * num_out_atoms], stddev=0.01) # weights = tf.clip_by_norm(weights, 1.0, axes=[1]) activation_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1], init_value=1.0, name='activation_biases') sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1], init_value=2.0, name='sigma_biases') with tf.name_scope('Wx_plus_b'): # input_tensor: [x, 128, 8, h, w] input_shape = tf.shape(input_tensor) input_trans = tf.transpose(input_tensor, [1, 0, 3, 4, 2]) input_share = tf.reshape(input_trans, [input_dim, -1, num_in_atoms]) # input_expanded: [x, 128, 8, 1] wx_share = tf.matmul(input_share, weights) # sqr_num_out_atoms = num_out_atoms num_out_atoms *= num_out_atoms wx_trans = tf.reshape(wx_share, [ input_dim, input_shape[0], input_shape[3], input_shape[4], num_out_atoms, output_dim ]) wx_trans.set_shape( (input_dim, None, input_tensor.get_shape()[3], input_tensor.get_shape()[4], num_out_atoms, output_dim)) h, w, _ = position_grid.get_shape() height = h width = w # t_pose = tf.transpose(position_grid, [2, 0, 1]) # t_pose_exp = tf.scatter_nd([[sqr_num_out_atoms -1], # [2 * sqr_num_out_atoms - 1]], t_pose, [num_out_atoms, height, width]) # pose_g_exp = tf.transpose(t_pose_exp, [1, 2, 0]) zero_grid = tf.zeros([height, width, num_out_atoms - 2]) pose_g_exp = tf.concat([position_grid, zero_grid], axis=2) pose_g = tf.expand_dims( tf.expand_dims(tf.expand_dims(pose_g_exp, -1), 0), 0) wx_posed = wx_trans + pose_g wx_posed_t = tf.transpose(wx_posed, [1, 0, 2, 3, 5, 4]) # Wx_reshaped: [x, 128, 10, 8] wx = tf.reshape(wx_posed_t, [ -1, input_dim * height * width, output_dim, num_out_atoms, 1, 1 ]) with tf.name_scope('routing'): # Routing # logits: [x, 128, 10] logit_shape = [input_dim * height * width, output_dim, 1, 1, 1] for _ in range(4): input_activation = tf.expand_dims(input_activation, axis=-1) activation, center = update_em_routing( wx=wx, input_activation=input_activation, activation_biases=activation_biases, sigma_biases=sigma_biases, logit_shape=logit_shape, num_out_atoms=num_out_atoms, num_routing=num_routing, output_dim=output_dim, leaky=leaky, final_beta=final_beta / 4, min_var=min_var, ) out_activation = tf.squeeze(activation, axis=[1, 3, 4, 5]) out_center = tf.squeeze(center, axis=[1, 4, 5]) return tf.sigmoid(out_activation), out_center
import numpy as np tf.disable_v2_behavior() xy = np.loadtxt('data/data-03-diabetes.csv', delimiter=',', dtype=np.float32) x_data = xy[:, 0:-1] y_data = xy[:, [-1]] # placeholders for a tensor that will be always fed. X = tf.placeholder(tf.float32, shape=[None, 8]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([8, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') # sigmoid 함수를 이용한 가설, 0~1 사이의 실수가 나온다. hypothesis = tf.sigmoid(tf.matmul(X, W) + b) # log 함수가 적용된 cost 함수 cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis)) train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost) # hypothesis가 0.5 보다 크면 True, 아니면 False인데, 이를 float32로 cast하면 1.0 혹은 0.0이 나온다. predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32) # 예측값과 Y값이 같은지 비교하고, 이 횟수를 통해 정확도(accuracy)를 구한다 accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer())
def conv_capsule_mat_fast( input_tensor, input_activation, input_dim, output_dim, layer_name, num_routing=3, num_in_atoms=3, num_out_atoms=3, stride=2, kernel_size=5, min_var=0.0005, final_beta=1.0, ): """Convolutional Capsule layer with fast EM routing. Args: input_tensor: The input capsule features. input_activation: The input capsule activations. input_dim: Number of input capsule types. output_dim: Number of output capsule types. layer_name: Name of this layer, e.g. conv_capsule1 num_routing: Number of routing iterations. num_in_atoms: Number of features in each of the input capsules. num_out_atoms: Number of features in each of the output capsules. stride: Stride of the convolution. kernel_size: kernel size of the convolution. min_var: Minimum varience for each capsule to avoid NaNs. final_beta: beta for making the routing factors sharp. Returns: The final capsule center and activations. """ tf.logging.info('conv_capsule_mat %s', layer_name) tf.logging.info('input_shape %s', input_tensor.shape.as_list()) in_atom_sq = num_in_atoms * num_in_atoms with tf.variable_scope(layer_name): # This should be fully defined... # input_shape = tf.shape(input_tensor) input_shape = input_tensor.shape.as_list() batch, _, _, in_height, in_width = input_shape o_height = (in_height - kernel_size) // stride + 1 o_width = (in_width - kernel_size) // stride + 1 # This Variable will hold the state of the weights for the layer. kernel = utils.weight_variable(shape=[ input_dim, kernel_size, kernel_size, num_in_atoms, output_dim * num_out_atoms ], stddev=0.1) activation_biases = utils.bias_variable( [1, 1, output_dim, 1, 1, 1, 1, 1], init_value=0.2, name='activation_biases') sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1], init_value=.5, name='sigma_biases') with utils.maybe_jit_scope(), tf.name_scope('conv'): input_tensor_reshaped = tf.reshape( input_tensor, [batch * input_dim * in_atom_sq, in_height, in_width, 1]) input_act_reshaped = tf.reshape( input_activation, [batch * input_dim, in_height, in_width, 1]) conv_patches = utils.kernel_tile(input_tensor_reshaped, kernel_size, stride) act_patches = utils.kernel_tile(input_act_reshaped, kernel_size, stride) patches = tf.reshape(conv_patches, (batch, input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2]) patch_split = tf.reshape( patch_trans, (input_dim, kernel_size, kernel_size, batch * o_height * o_width * num_in_atoms, num_in_atoms), name='patch_split') a_patches = tf.reshape(act_patches, (batch, input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size), name='a_patches') # Recompute Wx on backprop to save memory (perhaps redo patches as well?) # @tf.contrib.layers.recompute_grad def compute_wx(patch_split, kernel, is_recomputing=False): tf.logging.info('compute_wx(is_recomputing=%s)', is_recomputing) with utils.maybe_jit_scope(), tf.name_scope('wx'): wx = tf.matmul(patch_split, kernel) wx = tf.reshape( wx, (input_dim, kernel_size, kernel_size, batch, o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2]) return wx wx = compute_wx(patch_split, kernel.value()) with utils.maybe_jit_scope(): # Routing logit_shape = [ input_dim, output_dim, 1, o_height, o_width, kernel_size, kernel_size ] tf.logging.info('logit_shape: %s', logit_shape) activation, center = update_conv_routing_fast( wx=wx, input_activation=a_patches, activation_biases=activation_biases, sigma_biases=sigma_biases, logit_shape=logit_shape, num_out_atoms=num_out_atoms * num_out_atoms, input_dim=input_dim, num_routing=num_routing, output_dim=output_dim, min_var=min_var, final_beta=4 * final_beta, stride=stride, layer_name=layer_name, ) with utils.maybe_jit_scope(): out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7], name='out_activation') out_center = tf.squeeze(center, axis=[1, 6, 7], name='out_center') out_activation = tf.sigmoid(out_activation) with tf.name_scope('center'): utils.activation_summary(out_center) return out_activation, out_center
def model_fn(features, labels, mode): """Creates the prediction, loss, and train ops. Args: features: A dictionary of tensors keyed by the feature name. labels: A dictionary of label tensors keyed by the label key. mode: The execution mode, as defined in tf.contrib.learn.ModeKeys. Returns: EstimatorSpec with the mode, prediction, loss, train_op and output_alternatives a dictionary specifying the output for a servo request during serving. """ # 1. Construct input to RNN sequence_feature_map = { k: features[input_fn.SEQUENCE_KEY_PREFIX + k] for k in hparams.sequence_features } sequence_length = tf.squeeze( features[input_fn.CONTEXT_KEY_PREFIX + 'sequenceLength'], axis=1, name='sq_seq_len') tf.summary.scalar('sequence_length', tf.reduce_mean(sequence_length)) diff_delta_time, obs_values, indicator = construct_input( sequence_feature_map, hparams.categorical_values, hparams.categorical_seq_feature, hparams.feature_value, mode, hparams.normalize, hparams.momentum, hparams.min_value, hparams.max_value, hparams.input_keep_prob) seq_mask = tf.expand_dims( tf.sequence_mask(sequence_length, dtype=tf.float32), axis=2) logits, weights = construct_logits( diff_delta_time, obs_values, indicator, sequence_length, seq_mask, hparams, reuse=False) all_attribution_dict = {} if mode == tf.estimator.ModeKeys.TRAIN: if hparams.sequence_prediction: assert not hparams.use_rnn_attention # If we train a sequence_prediction we repeat the labels over time. label_tensor = labels[hparams.label_key] labels[hparams.label_key] = tf.tile( tf.expand_dims(label_tensor, 2), multiples=[1, tf.shape(logits)[1], 1]) if hparams.volatility_loss_factor > 0.0: volatility = tf.reduce_sum( tf.square(seq_mask * compute_prediction_diff_attribution(logits))) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, volatility * hparams.volatility_loss_factor) elif not hparams.use_rnn_attention: logits = rnn_common.select_last_activations( logits, tf.to_int32(sequence_length)) else: if hparams.sequence_prediction: last_logits = rnn_common.select_last_activations( logits, tf.to_int32(sequence_length)) else: last_logits = logits if mode == tf.estimator.ModeKeys.PREDICT: delta_time = sequence_feature_map['deltaTime'] all_attributions = {} if hparams.include_gradients_attribution: all_attributions['gradient_last'] = compute_gradient_attribution( last_logits, obs_values, indicator) if hparams.include_gradients_sum_time_attribution: assert not hparams.use_rnn_attention all_attributions['gradient_sum'] = compute_gradient_attribution( _predictions_for_gradients( logits, seq_mask, delta_time, hparams.attribution_max_delta_time, averaged=False), obs_values, indicator) if hparams.include_gradients_avg_time_attribution: assert not hparams.use_rnn_attention all_attributions['gradient_avg'] = compute_gradient_attribution( _predictions_for_gradients( logits, seq_mask, delta_time, hparams.attribution_max_delta_time, averaged=True), obs_values, indicator) if hparams.include_path_integrated_gradients_attribution: all_attributions['integrated_gradient'] = ( compute_path_integrated_gradient_attribution( obs_values, indicator, diff_delta_time, delta_time, sequence_length, seq_mask, hparams)) if hparams.use_rnn_attention: all_attributions['rnn_attention'] = weights if hparams.include_diff_sequence_prediction_attribution: all_attributions['diff_sequence'] = ( compute_prediction_diff_attribution(logits)) all_attribution_dict = {} for attribution_name, attribution in all_attributions.items(): attribution_dict = convert_attribution( attribution, sequence_feature_map, seq_mask, delta_time, hparams.attribution_threshold, hparams.attribution_max_delta_time, prefix=attribution_name + '-') all_attribution_dict.update(attribution_dict) if hparams.include_sequence_prediction: # Add the predictions at each time step to the attention dictionary. attribution_indices = tf.where(seq_mask > 0.5) all_attribution_dict['predictions'] = tf.sparse.expand_dims( tf.SparseTensor( indices=attribution_indices, values=tf.gather_nd( tf.sigmoid(logits), attribution_indices), dense_shape=tf.to_int64(tf.shape(delta_time))), axis=1) # At test/inference time we only make a single prediction even if we did # sequence_prediction during training. logits = last_logits seq_mask = None probabilities = tf.sigmoid(logits) classes = probabilities > 0.5 predictions = { PredictionKeys.LOGITS: logits, PredictionKeys.PROBABILITIES: probabilities, PredictionKeys.CLASSES: classes } # Calculate the loss for TRAIN and EVAL, but not PREDICT. if mode == tf.estimator.ModeKeys.PREDICT: loss = None else: loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels[hparams.label_key], logits=predictions[PredictionKeys.LOGITS]) if hparams.sequence_prediction: loss *= seq_mask loss = tf.reduce_mean(loss) regularization_losses = tf.losses.get_regularization_losses() if regularization_losses: tf.summary.scalar('loss/prior_regularization', loss) regularization_loss = tf.add_n(regularization_losses) tf.summary.scalar('loss/regularization_loss', regularization_loss) loss += regularization_loss tf.summary.scalar('loss', loss) train_op = None if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8) optimizer = contrib_estimator.clip_gradients_by_norm(optimizer, 6.0) train_op = contrib_training.create_train_op( total_loss=loss, optimizer=optimizer, summarize_gradients=False) if mode != tf.estimator.ModeKeys.TRAIN: for k, v in all_attribution_dict.items(): if not isinstance(v, tf.SparseTensor): raise ValueError('Expect attributions to be in SparseTensor, ' 'getting %s for feature %s' % (v.__class__.__name__, k)) predictions['attention_attribution,%s,indices' % k] = v.indices predictions['attention_attribution,%s,values' % k] = v.values predictions['attention_attribution,%s,shape' % k] = v.dense_shape eval_metric_ops = {} if mode == tf.estimator.ModeKeys.EVAL: auc = tf.metrics.auc prob_k = PredictionKeys.PROBABILITIES class_k = PredictionKeys.CLASSES m = 'careful_interpolation' metric_fn_dict = { 'auc-roc': lambda l, p: auc(l, p[prob_k], curve='ROC', summation_method=m), 'auc-pr': lambda l, p: auc(l, p[prob_k], curve='PR', summation_method=m), 'accuracy': lambda l, p: tf.metrics.accuracy(l, p[class_k]), } for (k, f) in metric_fn_dict.items(): eval_metric_ops[k] = f(label_tensor, predictions) # Define the output for serving. export_outputs = {} if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { 'mortality': tf.estimator.export.PredictOutput(predictions) } return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs)
def get_estimator_spec(hparams, mode, features, labels, frame_logits, onset_logits, offset_logits, velocity_values, offset_network=True): """Create TPUEstimatorSpec.""" loss_metrics = {} loss = None if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): onset_losses = tf.losses.sigmoid_cross_entropy( labels.onsets[:, :, :constants.MIDI_PITCHES], onset_logits[:, :, :constants.MIDI_PITCHES], weights=tf.expand_dims(tf.sequence_mask(features.length, maxlen=tf.shape( labels.onsets)[1]), axis=2)) loss_metrics['onset'] = onset_losses if offset_network and not hparams.drums_only: offset_losses = tf.losses.sigmoid_cross_entropy( labels.offsets[:, :, :constants.MIDI_PITCHES], offset_logits[:, :, :constants.MIDI_PITCHES], weights=tf.expand_dims(tf.sequence_mask( features.length, maxlen=tf.shape(labels.offsets)[1]), axis=2)) loss_metrics['offset'] = offset_losses velocity_losses = tf.losses.mean_squared_error( labels.velocities, velocity_values, weights=labels.onsets * hparams.velocity_loss_weight) loss_metrics['velocity'] = velocity_losses if not hparams.drums_only: frame_losses = tf.losses.sigmoid_cross_entropy( labels.labels[:, :, :constants.MIDI_PITCHES], frame_logits[:, :, :constants.MIDI_PITCHES], weights=tf.expand_dims(tf.sequence_mask(features.length, maxlen=tf.shape( labels.labels)[1]), axis=2)) loss_metrics['frame'] = frame_losses loss = tf.losses.get_total_loss() if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): frame_probs = tf.sigmoid(frame_logits) onset_probs = tf.sigmoid(onset_logits) if offset_network: offset_probs = tf.sigmoid(offset_logits) else: offset_probs = tf.zeros_like(onset_probs) frame_predictions = frame_probs > hparams.predict_frame_threshold onset_predictions = onset_probs > hparams.predict_onset_threshold offset_predictions = offset_probs > hparams.predict_offset_threshold if hparams.drum_prediction_map: map_predictions = functools.partial( drum_mappings.map_pianoroll, mapping_name=hparams.drum_prediction_map, reduce_mode='any', min_pitch=constants.MIN_MIDI_PITCH) frame_predictions = tf.map_fn(map_predictions, frame_predictions) onset_predictions = tf.map_fn(map_predictions, onset_predictions) offset_predictions = tf.map_fn(map_predictions, offset_predictions) map_values = functools.partial( drum_mappings.map_pianoroll, mapping_name=hparams.drum_prediction_map, reduce_mode='max', min_pitch=constants.MIN_MIDI_PITCH) velocity_values = tf.map_fn(map_values, velocity_values) metrics_values = get_metrics(features, labels, frame_probs, onset_probs, frame_predictions, onset_predictions, offset_predictions, velocity_values, hparams) for label, loss_collection in loss_metrics.items(): loss_label = 'losses/' + label metrics_values[loss_label] = loss_collection if mode == tf.estimator.ModeKeys.TRAIN: train_op = tf_slim.optimize_loss( name='training', loss=loss, global_step=tf.train.get_or_create_global_step(), learning_rate=hparams.learning_rate, learning_rate_decay_fn=functools.partial( tf.train.exponential_decay, decay_steps=hparams.decay_steps, decay_rate=hparams.decay_rate, staircase=True), clip_gradients=hparams.clip_norm, summaries=[], optimizer=lambda lr: tf.tpu.CrossShardOptimizer( tf.train.AdamOptimizer(lr))) return tf.tpu.estimator.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: metric_ops = {k: tf.metrics.mean(v) for k, v in metrics_values.items()} return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=metric_ops) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'frame_probs': frame_probs, 'onset_probs': onset_probs, 'frame_predictions': frame_predictions, 'onset_predictions': onset_predictions, 'offset_predictions': offset_predictions, 'velocity_values': velocity_values, 'sequence_predictions': _predict_sequences(frame_probs=frame_probs, onset_probs=onset_probs, frame_predictions=frame_predictions, onset_predictions=onset_predictions, offset_predictions=offset_predictions, velocity_values=velocity_values, hparams=hparams), # Include some features and labels in output because Estimator 'predict' # API does not give access to them. 'sequence_ids': features.sequence_id, 'sequence_labels': labels.note_sequence, 'frame_labels': labels.labels, 'onset_labels': labels.onsets, } for k, v in metrics_values.items(): predictions[k] = tf.stack(v) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) else: raise ValueError('Unsupported mode: %s' % mode)
def __call__(self, x, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): total_h, total_c = tf.split(state, 2, 1) h = total_h[:, 0:self.num_units] c = total_c[:, 0:self.num_units] self.hyper_state = tf.concat( [total_h[:, self.num_units:], total_c[:, self.num_units:]], 1) batch_size = x.get_shape().as_list()[0] x_size = x.get_shape().as_list()[1] self._input_size = x_size w_init = None # uniform h_init = lstm_ortho_initializer(1.0) w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) bias = tf.get_variable( 'bias', [4 * self.num_units], initializer=tf.constant_initializer(0.0)) # concatenate the input and hidden states for hyperlstm input hyper_input = tf.concat([x, h], 1) hyper_output, hyper_new_state = self.hyper_cell(hyper_input, self.hyper_state) self.hyper_output = hyper_output self.hyper_state = hyper_new_state xh = tf.matmul(x, w_xh) hh = tf.matmul(h, w_hh) # split Wxh contributions ix, jx, fx, ox = tf.split(xh, 4, 1) ix = self.hyper_norm(ix, 'hyper_ix', use_bias=False) jx = self.hyper_norm(jx, 'hyper_jx', use_bias=False) fx = self.hyper_norm(fx, 'hyper_fx', use_bias=False) ox = self.hyper_norm(ox, 'hyper_ox', use_bias=False) # split Whh contributions ih, jh, fh, oh = tf.split(hh, 4, 1) ih = self.hyper_norm(ih, 'hyper_ih', use_bias=True) jh = self.hyper_norm(jh, 'hyper_jh', use_bias=True) fh = self.hyper_norm(fh, 'hyper_fh', use_bias=True) oh = self.hyper_norm(oh, 'hyper_oh', use_bias=True) # split bias ib, jb, fb, ob = tf.split(bias, 4, 0) # bias is to be broadcasted. # i = input_gate, j = new_input, f = forget_gate, o = output_gate i = ix + ih + ib j = jx + jh + jb f = fx + fh + fb o = ox + oh + ob if self.use_layer_norm: concat = tf.concat([i, j, f, o], 1) concat = layer_norm_all(concat, batch_size, 4, self.num_units, 'ln_all') i, j, f, o = tf.split(concat, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(layer_norm(new_c, self.num_units, 'ln_c')) * tf.sigmoid(o) hyper_h, hyper_c = tf.split(hyper_new_state, 2, 1) new_total_h = tf.concat([new_h, hyper_h], 1) new_total_c = tf.concat([new_c, hyper_c], 1) new_total_state = tf.concat([new_total_h, new_total_c], 1) return new_h, new_total_state
def build(self, inputs, is_training, rescale_inputs=True, include_decoder=True, use_reduce_mean_to_pool=False): """Build the graph for this configuration. Args: inputs: A dict of inputs. For training, should contain 'wav'. is_training: Whether we are training or not. Not used in this config. rescale_inputs: Whether to convert inputs to mu-law and back to unit scaling before passing through the model (loses gradients). include_decoder: bool, whether to include the decoder in the build(). use_reduce_mean_to_pool: whether to use reduce_mean (instead of pool1d) for pooling. Returns: A dict of outputs that includes the 'predictions', 'loss', the 'encoding', the 'quantized_input', and whatever metrics we want to track for eval. """ num_stages = 10 num_layers = 30 filter_length = 3 width = 512 skip_width = 256 ae_num_stages = 10 ae_num_layers = 30 ae_filter_length = 3 ae_width = 128 # Encode the source with 8-bit Mu-Law. x = inputs['wav'] x_quantized = utils.mu_law(x) x_scaled = tf.cast(x_quantized, tf.float32) / 128.0 x_scaled = tf.expand_dims(x_scaled, 2) x = tf.expand_dims(x, 2) ### # The Non-Causal Temporal Encoder. ### en = masked.conv1d(x_scaled if rescale_inputs else x, causal=False, num_filters=ae_width, filter_length=ae_filter_length, name='ae_startconv', is_training=is_training) for num_layer in range(ae_num_layers): dilation = 2**(num_layer % ae_num_stages) d = tf.nn.relu(en) d = masked.conv1d(d, causal=False, num_filters=ae_width, filter_length=ae_filter_length, dilation=dilation, name='ae_dilatedconv_%d' % (num_layer + 1), is_training=is_training) d = tf.nn.relu(d) en += masked.conv1d(d, num_filters=ae_width, filter_length=1, name='ae_res_%d' % (num_layer + 1), is_training=is_training) en = masked.conv1d(en, num_filters=self.ae_bottleneck_width, filter_length=1, name='ae_bottleneck', is_training=is_training) if use_reduce_mean_to_pool: # Depending on the accelerator used for training, masked.pool1d may # lead to out of memory error. # reduce_mean is equivalent to masked.pool1d when the stride is the same # as the window length (which is the case here). batch_size, unused_length, depth = en.shape.as_list() en = tf.reshape(en, [batch_size, -1, self.ae_hop_length, depth]) en = tf.reduce_mean(en, axis=2) else: en = masked.pool1d(en, self.ae_hop_length, name='ae_pool', mode='avg') encoding = en if not include_decoder: return {'encoding': encoding} ### # The WaveNet Decoder. ### l = masked.shift_right(x_scaled if rescale_inputs else x) l = masked.conv1d(l, num_filters=width, filter_length=filter_length, name='startconv', is_training=is_training) # Set up skip connections. s = masked.conv1d(l, num_filters=skip_width, filter_length=1, name='skip_start', is_training=is_training) # Residual blocks with skip connections. for i in range(num_layers): dilation = 2**(i % num_stages) d = masked.conv1d(l, num_filters=2 * width, filter_length=filter_length, dilation=dilation, name='dilatedconv_%d' % (i + 1), is_training=is_training) d = self._condition( d, masked.conv1d(en, num_filters=2 * width, filter_length=1, name='cond_map_%d' % (i + 1), is_training=is_training)) assert d.get_shape().as_list()[2] % 2 == 0 m = d.get_shape().as_list()[2] // 2 d_sigmoid = tf.sigmoid(d[:, :, :m]) d_tanh = tf.tanh(d[:, :, m:]) d = d_sigmoid * d_tanh l += masked.conv1d(d, num_filters=width, filter_length=1, name='res_%d' % (i + 1), is_training=is_training) s += masked.conv1d(d, num_filters=skip_width, filter_length=1, name='skip_%d' % (i + 1), is_training=is_training) s = tf.nn.relu(s) s = masked.conv1d(s, num_filters=skip_width, filter_length=1, name='out1', is_training=is_training) s = self._condition( s, masked.conv1d(en, num_filters=skip_width, filter_length=1, name='cond_map_out1', is_training=is_training)) s = tf.nn.relu(s) ### # Compute the logits and get the loss. ### logits = masked.conv1d(s, num_filters=256, filter_length=1, name='logits', is_training=is_training) logits = tf.reshape(logits, [-1, 256]) probs = tf.nn.softmax(logits, name='softmax') x_indices = tf.cast(tf.reshape(x_quantized, [-1]), tf.int32) + 128 loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=x_indices, name='nll'), 0, name='loss') return { 'predictions': probs, 'loss': loss, 'eval': { 'nll': loss }, 'quantized_input': x_quantized, 'encoding': encoding, }
def __call__(self, input_, state, scope=None): """Run one step of RHN. All tensor arguments are shaped [batch_size, *]. Args: input_: A tensor. state: An TiledRHNStateTuple. scope: VariableScope for the created subgraph; defaults to `TiledRHNCell`. Returns: A tuple containing: - A `2-D, [batch, num_units]`, Tensor representing the output of the RHN after one time step (which consists of `depth` number of computational steps). - An TiledRHNStateTuple tuple of Tensors representing the new state of the RHN after one time step. Raises: ValueError: If input size cannot be inferred from `input_` via static shape inference. """ num_units = self._num_units def maybe_transform(transform, x): if transform is None: return x else: return transform(x) # Apply transformations to the input and the recurrent state. transformed_input = maybe_transform(self._input_transform, input_) # Let's figure out what the outputs are. output_name_and_sizes = [ # This is the proposed update (usually 'j' in an LSTM). ('h', num_units), # Called 'carry' gate in the paper. This pretty much plays the # part of the forget gate of an LSTM. ('c', num_units) ] if not self._tie_gates: # Called 'transform' gate, this is like the input gate of an # LSTM. output_name_and_sizes.append(('t', num_units)) with tf.variable_scope(scope or type(self).__name__, initializer=self._initializer): s = state.s for level in six.moves.range(self._depth): with tf.variable_scope('layer{}'.format(level)): transformed_s = maybe_transform(self._state_transform, s) if level == 0: inputs = [transformed_input, transformed_s] input_name_and_sizes = [ ('x', transformed_input.get_shape().with_rank(2)[1]), # This is the raw cell state. Unlike in an LSTM this # is not passed through any non-linearity. ('s', num_units) ] else: inputs = [transformed_s] input_name_and_sizes = [('s', num_units)] if self._tiled_linear_mods[level] is None: self._tiled_linear_mods[ level] = self._tiled_linear_class( input_name_and_sizes, output_name_and_sizes, self._tiled_linear_var_init_params) if self._tie_gates: h_pre, c_pre = self._tiled_linear_mods[level](inputs) else: h_pre, c_pre, t_pre = self._tiled_linear_mods[level]( inputs) # Compute the cell state s. c = tf.sigmoid(c_pre) h = self._activation(h_pre) h = maybe_transform(self._update_transform, h) if self._tie_gates: t = 1 - c else: t = tf.sigmoid(t_pre) s = c * s + t * h if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type s = tf.clip_by_value(s, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type return s, TiledRHNStateTuple(s)
def AdjMatrixAccuracy(logits, labels): predictions = tf.cast(tf.greater(tf.sigmoid(logits), .5), tf.float64) accuracies = tf.cast(tf.equal(predictions, labels), tf.float64) return tf.reduce_mean(accuracies) # Report accuracy per edge
def __call__(self, inputs, state, scope=None): """Run this RNN cell on inputs, starting from the given state. Args: inputs: `2-D` tensor with shape `[batch_size, input_size]`. state: `2-D Tensor` with shape `[batch_size, self.state_size]`. scope: optional cell scope. Returns: A pair containing: - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`. - New state: A single `2-D` tensor. """ batch_size, hidden_size = inputs.shape fixed_arc = self._params.fixed_arc num_layers = len(fixed_arc) // 2 prev_s = self.prev_s w_prev = self.w_prev w_skip = self.w_skip input_mask = self._input_mask layer_mask = self._layer_mask if layer_mask is not None: assert input_mask is not None ht = tf.matmul( tf.concat([inputs * input_mask, state * layer_mask], axis=1), w_prev) else: ht = tf.matmul(tf.concat([inputs, state], axis=1), w_prev) h, t = tf.split(ht, 2, axis=1) h = tf.tanh(h) t = tf.sigmoid(t) s = state + t * (h - state) layers = [s] def _select_function(h, function_id): if function_id == 0: return tf.tanh(h) elif function_id == 1: return tf.nn.relu(h) elif function_id == 2: return tf.sigmoid(h) elif function_id == 3: return h raise ValueError('Unknown func_idx {0}'.format(function_id)) start_idx = 0 used = np.zeros(num_layers + 1, dtype=np.float32) for layer_id in range(num_layers): prev_idx = fixed_arc[start_idx] func_idx = fixed_arc[start_idx + 1] prev_s = layers[prev_idx] used[prev_idx] = 1 if layer_mask is not None: ht = tf.matmul(prev_s * layer_mask, w_skip[layer_id]) else: ht = tf.matmul(prev_s, w_skip[layer_id]) h, t = tf.split(ht, 2, axis=1) h = _select_function(h, func_idx) t = tf.sigmoid(t) s = prev_s + t * (h - prev_s) s.set_shape([batch_size, hidden_size]) layers.append(s) start_idx += 2 if self._params.average_loose_ends: layers = [l for l, u in zip(layers, used) if u == 0] next_s = tf.add_n(layers) / np.sum(1. - used) else: next_s = tf.add_n(layers[1:]) / tf.cast(num_layers, dtype=tf.float32) return next_s, next_s
def build_network(image, layers, variables): def _weights(layer_name): return variables["MobilenetV1/" + layer_name + "/weights"]['x'] def _biases(layer_name): return variables["MobilenetV1/" + layer_name + "/biases"]['x'] def _depthwise_weights(layer_name): return variables["MobilenetV1/" + layer_name + "/depthwise_weights"]['x'] def _conv_to_output(mobile_net_output, output_layer_name): w = tf.nn.conv2d(mobile_net_output, _weights(output_layer_name), [1, 1, 1, 1], padding='SAME') w = tf.nn.bias_add(w, _biases(output_layer_name), name=output_layer_name) return w def _conv(inputs, stride, block_id): return tf.nn.relu6( tf.nn.conv2d(inputs, _weights("Conv2d_" + str(block_id)), stride, padding='SAME') + _biases("Conv2d_" + str(block_id))) def _separable_conv(inputs, stride, block_id, dilations): if dilations is None: dilations = [1, 1] dw_layer = "Conv2d_" + str(block_id) + "_depthwise" pw_layer = "Conv2d_" + str(block_id) + "_pointwise" w = tf.nn.depthwise_conv2d(inputs, _depthwise_weights(dw_layer), stride, 'SAME', rate=dilations, data_format='NHWC') w = tf.nn.bias_add(w, _biases(dw_layer)) w = tf.nn.relu6(w) w = tf.nn.conv2d(w, _weights(pw_layer), [1, 1, 1, 1], padding='SAME') w = tf.nn.bias_add(w, _biases(pw_layer)) w = tf.nn.relu6(w) return w x = image buff = [] with tf.variable_scope(None, 'MobilenetV1'): for m in layers: stride = [1, m['stride'], m['stride'], 1] rate = [m['rate'], m['rate']] if m['convType'] == "conv2d": x = _conv(x, stride, m['blockId']) buff.append(x) elif m['convType'] == "separableConv": x = _separable_conv(x, stride, m['blockId'], rate) buff.append(x) heatmaps = _conv_to_output(x, 'heatmap_2') offsets = _conv_to_output(x, 'offset_2') displacement_fwd = _conv_to_output(x, 'displacement_fwd_2') displacement_bwd = _conv_to_output(x, 'displacement_bwd_2') heatmaps = tf.sigmoid(heatmaps, 'heatmap') return heatmaps, offsets, displacement_fwd, displacement_bwd
def build_model(spec, length, hparams, is_training): """Builds a raw, API-independent onsets & frames.""" if hparams.stop_activation_gradient and not hparams.activation_loss: raise ValueError( 'If stop_activation_gradient is true, activation_loss must be true.' ) with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with tf.variable_scope('onsets'): onset_outputs = acoustic_model(spec, hparams, lstm_units=hparams.onset_lstm_units, lengths=length, is_training=is_training) onset_logits = slim.fully_connected(onset_outputs, constants.MIDI_PITCHES, activation_fn=None, scope='onset_logits') offset_logits = [] if hparams.offset_network: with tf.variable_scope('offsets'): offset_outputs = acoustic_model( spec, hparams, lstm_units=hparams.offset_lstm_units, lengths=length, is_training=is_training) offset_logits = slim.fully_connected(offset_outputs, constants.MIDI_PITCHES, activation_fn=None, scope='offset_logits') with tf.variable_scope('velocity'): velocity_outputs = acoustic_model( spec, hparams, lstm_units=hparams.velocity_lstm_units, lengths=length, is_training=is_training) velocity_values = slim.fully_connected(velocity_outputs, constants.MIDI_PITCHES, activation_fn=None, scope='onset_velocities') with tf.variable_scope('frame'): if not hparams.share_conv_features: # TODO(eriche): this is broken when hparams.frame_lstm_units > 0 activation_outputs = acoustic_model( spec, hparams, lstm_units=hparams.frame_lstm_units, lengths=length, is_training=is_training) activation_logits = slim.fully_connected( activation_outputs, constants.MIDI_PITCHES, activation_fn=None, scope='activation_logits') else: activation_logits = slim.fully_connected( onset_outputs, constants.MIDI_PITCHES, activation_fn=None, scope='activation_logits') logits = [] if hparams.stop_onset_gradient: logits.append(tf.stop_gradient(onset_logits)) else: logits.append(onset_logits) if hparams.stop_activation_gradient: logits.append(tf.stop_gradient(activation_logits)) else: logits.append(activation_logits) if hparams.offset_network: if hparams.stop_offset_gradient: logits.append(tf.stop_gradient(offset_logits)) else: logits.append(offset_logits) combined_logits = tf.concat(logits, 2) if hparams.combined_lstm_units > 0: if hparams.use_tflite_compatible: lstm_layer_builder = lstm_layer_static_for_tflite else: lstm_layer_builder = lstm_layer outputs = lstm_layer_builder( tf.sigmoid(combined_logits), hparams.combined_lstm_units, hparams.bidirectional, is_training=is_training, lengths=length if hparams.use_lengths else None, stack_size=hparams.combined_rnn_stack_size, dropout_keep_prob=hparams.combined_rnn_dropout_keep_prob) else: outputs = combined_logits frame_logits = slim.fully_connected(outputs, constants.MIDI_PITCHES, activation_fn=None, scope='frame_logits') return frame_logits, onset_logits, offset_logits, velocity_values
def MLPdemo(): # 数据格式处理 # sample = "../Script/Mapping/Mfe/Sample.csv" sample = "../Script/Mapping/Mfe/SamTr.csv" potus = list(csv.reader(open(sample))) dx = [] dy = [] potus = potus[1:] # shuffle(potus) for i in range(0, len(potus)): dx.append([int(x) for x in potus[i][0:len(potus[i]) - 1]]) dy.append([int(potus[i][len(potus[i]) - 1])]) # train_dx = dx[0:864]; test_dx = dx[1152:] train_dx = dx[0:864] test_dx = dx[864:] train_dy = dy[0:864] test_dy = dy[864:] # 定义输入和输出 x = tf.placeholder(tf.float32, shape=(None, 203), name="x-input") y_ = tf.placeholder(tf.float32, shape=(None, 1), name="y-input") # 定义神经网络的参数 w1 = tf.Variable(tf.random_normal([203, 10], mean=0, stddev=1, seed=1)) w2 = tf.Variable(tf.random_normal([10, 1], mean=0, stddev=1, seed=1)) b1 = tf.Variable(tf.random_normal([10], mean=0, stddev=1, seed=1)) b2 = tf.Variable(tf.random_normal([1], mean=0, stddev=1, seed=1)) y1 = tf.matmul(x, w1) + b1 y11 = tf.nn.relu(y1) y2 = tf.matmul(y11, w2) + b2 y = tf.sigmoid(y2) # tf.clip_by_value(t, clip_value_min, clip_value_max,name=None) # cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) # loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=y)) loss = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)) + (1 - y_) * tf.log(tf.clip_by_value(1 - y, 1e-10, 1.0))) train_step = tf.train.AdamOptimizer(0.001).minimize(loss) X = train_dx Y = train_dy # 创建会话运行TensorFlow程序 with tf.Session() as sess: init = tf.initialize_all_variables() saver = tf.train.Saver() sess.run(init) steps = 1500 for i in range(steps): # 通过选取样本训练神经网络并更新参数 sess.run(train_step, feed_dict={x: X, y_: Y}) # 每迭代1000次输出一次日志信息 if i % 100 == 0: # 计算所有数据的交叉熵 total_cross_entropy, prob = sess.run([loss, y], feed_dict={ x: test_dx, y_: test_dy }) # 输出交叉熵之和 print( "After %d training step(s),cross entropy on all data is %g" % (i, total_cross_entropy)) prob_train = sess.run(y, feed_dict={x: train_dx, y_: train_dy}) # print(str(w1.eval(session=sess))) # print(str(w2.eval(session=sess))) # print(b1.eval(session=sess)) # print(b2.eval(session=sess)) from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, recall_score, precision_score roc_test = roc_auc_score(test_dy, prob) roc_train = roc_auc_score(train_dy, prob_train) prob_sig = [] for i in prob: prob_sig.append(1 if float(i) > 0.5 else 0) print(accuracy_score(test_dy, prob_sig)) # save_path = saver.save(sess, '../ML/model.ckpt') # print("Model saved in file: %s" % save_path) result = [] result.append([ roc_test, str(w1.eval(session=sess)), str(w2.eval(session=sess)), str(b1.eval(session=sess)), str(b2.eval(session=sess)) ]) import matplotlib.pyplot as plt from sklearn.metrics import roc_curve import pandas as pd print("auc :", roc_test, "-", roc_train) y_scores = prob_sig fpr, tpr, thresholds = roc_curve(test_dy, prob, pos_label=1.0) plt.figure(figsize=(6.4, 6.4)) plt.plot(fpr, tpr, color='blue', label='AUC = %0.4f' % roc_test) plt.plot([0, 1], [0, 1], color='red', linestyle='--') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic of MLP') plt.legend(loc="lower right") plt.show()
def conv_capsule_mat(input_tensor, input_activation, input_dim, output_dim, layer_name, num_routing=3, num_in_atoms=3, num_out_atoms=3, stride=2, kernel_size=5, min_var=0.0005, final_beta=1.0): """Convolutional Capsule layer with Pose Matrices.""" print('caps conv stride: {}'.format(stride)) in_atom_sq = num_in_atoms * num_in_atoms with tf.variable_scope(layer_name): input_shape = tf.shape(input_tensor) _, _, _, in_height, in_width = input_tensor.get_shape() # This Variable will hold the state of the weights for the layer kernel = utils.weight_variable(shape=[ input_dim, kernel_size, kernel_size, num_in_atoms, output_dim * num_out_atoms ], stddev=0.3) # kernel = tf.clip_by_norm(kernel, 3.0, axes=[1, 2, 3]) activation_biases = utils.bias_variable( [1, 1, output_dim, 1, 1, 1, 1, 1], init_value=0.5, name='activation_biases') sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1], init_value=.5, name='sigma_biases') with tf.name_scope('conv'): print('convi;') # input_tensor: [x,128,8, c1,c2] -> [x*128,8, c1,c2] print(input_tensor.get_shape()) input_tensor_reshaped = tf.reshape(input_tensor, [ input_shape[0] * input_dim * in_atom_sq, input_shape[3], input_shape[4], 1 ]) input_tensor_reshaped.set_shape((None, input_tensor.get_shape()[3], input_tensor.get_shape()[4], 1)) input_act_reshaped = tf.reshape(input_activation, [ input_shape[0] * input_dim, input_shape[3], input_shape[4], 1 ]) input_act_reshaped.set_shape((None, input_tensor.get_shape()[3], input_tensor.get_shape()[4], 1)) print(input_tensor_reshaped.get_shape()) # conv: [x*128,out*out_at, c3,c4] conv_patches = tf.extract_image_patches( images=input_tensor_reshaped, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID', ) act_patches = tf.extract_image_patches( images=input_act_reshaped, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID', ) o_height = (in_height - kernel_size) // stride + 1 o_width = (in_width - kernel_size) // stride + 1 patches = tf.reshape(conv_patches, (input_shape[0], input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patches.set_shape((None, input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2]) patch_split = tf.reshape( patch_trans, (input_dim, kernel_size, kernel_size, input_shape[0] * o_height * o_width * num_in_atoms, num_in_atoms)) patch_split.set_shape( (input_dim, kernel_size, kernel_size, None, num_in_atoms)) a_patches = tf.reshape(act_patches, (input_shape[0], input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size)) a_patches.set_shape((None, input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size)) with tf.name_scope('input_act'): utils.activation_summary( tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(a_patches, axis=1), axis=-1), axis=-1)) with tf.name_scope('Wx'): wx = tf.matmul(patch_split, kernel) wx = tf.reshape(wx, (input_dim, kernel_size, kernel_size, input_shape[0], o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx.set_shape( (input_dim, kernel_size, kernel_size, None, o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2]) utils.activation_summary(wx) with tf.name_scope('routing'): # Routing # logits: [x, 128, 10, c3, c4] logit_shape = [ input_dim, output_dim, 1, o_height, o_width, kernel_size, kernel_size ] activation, center = update_conv_routing( wx=wx, input_activation=a_patches, activation_biases=activation_biases, sigma_biases=sigma_biases, logit_shape=logit_shape, num_out_atoms=num_out_atoms * num_out_atoms, input_dim=input_dim, num_routing=num_routing, output_dim=output_dim, min_var=min_var, final_beta=final_beta, ) # activations: [x, 10, 8, c3, c4] out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7]) out_center = tf.squeeze(center, axis=[1, 6, 7]) with tf.name_scope('center'): utils.activation_summary(out_center) return tf.sigmoid(out_activation), out_center
def __init__(self, inputs, config_reader=None): self.x = inputs[0] # calc sigmoid for each value in matrix. self.y = tf.sigmoid(self.x)
def pixcnn_gated_nonlinearity(a, b): return tf.sigmoid(a) * tf.tanh(b)
#training data set x_data = np.array([[10, 0], [8, 1], [3, 3], [2, 3], [5, 1], [2, 0], [1, 0]]) y_data = np.array([[1], [1], [1], [1], [0], [0], [0]]) #placeholder X = tf.placeholder(shape=[None, 2], dtype=tf.float32) Y = tf.placeholder(shape=[None, 1], dtype=tf.float32) #Weight(2행1열) & bias(1개) W = tf.Variable(tf.random_normal([2, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') #Hypothesis #logits = X@W+b logits = tf.matmul(X, W) + b H = tf.sigmoid(logits) #cost function #cost = tf.reduce_mean(tf.square(H-Y)) cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y)) #train train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) #Session & Variable 초기화 sess = tf.Session() sess.run(tf.global_variables_initializer()) #학습 for step in range(1, 3001): _, cost_val = sess.run([train, cost], feed_dict={X: x_data, Y: y_data})
def __init__(self): super(CVAE, self).__init__() #TODO: add config parser #self.initizler = tf.keras.initializers.TruncatedNormal(mean=0.0, stddev=0.05, seed=None) #self.training_datadir='/media/jehill/DATA/ML_data/fastmri/singlecoil/train/singlecoil_train/' self.training_datadir = '/jmain01/home/JAD029/txl04/jxp48-txl04/data/fastmri_singlecoil/singlecoil_train/' self.BATCH_SIZE = 16 self.num_epochs = 150 self.learning_rate = 1e-3 self.model_name = "CVAE" self.image_dim = 128 self.channels = 1 self.latent_dim = 64 self.kernel_size = 3 lrelu = lambda x: tf.keras.activations.relu(x, alpha=0.3) self.activation = lrelu self.input_image_1 = tf.placeholder( tf.float32, shape=[None, 256, 256, self.channels]) #for time being resize images self.input_image = tf.image.resize_images( self.input_image_1, [np.int(self.image_dim), np.int(self.image_dim)]) self.image_shape = self.input_image.shape[1:] self.learning_rate = tf.placeholder(tf.float32, [], name='learning_rate') self.encoder = self.inference_net() self.decoder = self.generative_net() # note these are keras model mean, logvar = tf.split(self.encoder(self.input_image), num_or_size_splits=2, axis=1) self.z = self.reparameterize(mean, logvar) logits = self.decoder(self.z) self.reconstructed = tf.sigmoid(logits) # calculate the KL loss var = tf.exp(logvar) kl_loss = 0.5 * tf.reduce_sum(tf.square(mean) + var - 1. - logvar) # cal mse loss sse_loss = 0.5 * tf.reduce_sum(tf.square(self.input_image - logits)) self.total_loss = tf.reduce_mean(kl_loss + sse_loss) / self.BATCH_SIZE self.list_gradients = self.encoder.trainable_variables + self.decoder.trainable_variables self.Optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=0.5).minimize(self.total_loss, var_list=self.list_gradients) # summary and writer for tensorboard visulization tf.summary.image("Reconstructed image", self.reconstructed) tf.summary.image("Input image", self.input_image) tf.summary.scalar("KL", kl_loss) tf.summary.scalar("SSE", sse_loss) tf.summary.scalar("Total loss", self.total_loss) self.merged_summary = tf.summary.merge_all() self.init = tf.global_variables_initializer() self.saver = tf.train.Saver() self.logdir = './trained_models/' + self.model_name # if not exist create logdir self.image_dir = self.logdir + '/images/' self.model_dir = self.logdir + '/final_model' self.gpu_list = ['/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3'] #self.gpu_list = ['/gpu:0'] print("Completed creating the model") logging.debug("Completed creating the model") if (os.path.exists(self.image_dir)): shutil.rmtree(self.image_dir, ignore_errors=True) os.makedirs(self.image_dir) else: os.makedirs(self.image_dir)
def affine_coupling(name, x, x_mask, inverse, split_dim, identity_first, init, decoder_self_attention_bias=None, **kwargs): """Affine coupling transform layer. Args: name: variable scope. x: 3-D Tensor, shape=[B, L, C]. x_mask : 2-D Tensor, shape=[B, L]. inverse: Forward or inverse pass. split_dim: which dimension to split (time, channel_continuous, channel_alternate). identity_first: True means the first half remains constant. False for 2nd. init: init. decoder_self_attention_bias: bias. **kwargs: additional arguments. Contains hparams, encoder_output and encoder_decoder_attention_bias. Returns: z: data transformed by the affine coupling layer. shape=[B, L, C] logabsdets: Log absolute determinant Jacobian. shape=[B] """ hparams = kwargs["hparams"] batch_size, length, n_channels = common_layers.shape_list(x) assert hparams.scale_width > 0.0 and hparams.scale_width < 1.0 with tf.variable_scope(name, reuse=tf.AUTO_REUSE): x_id, x_tr, _, n_transform, bias, mask = gops.split_coupling( x, x_mask, split_dim, identity_first, decoder_self_attention_bias) z_id = x_id transform_params = gops.transformer_decoder_block( "theta_tr", n_layers=hparams.n_layers_transform_params, x=x_id, x_mask=mask, output_size=n_transform * 2, init=init, decoder_self_attention_bias=bias, **kwargs) loc, unconstrained_scale = tf.split(transform_params, 2, axis=-1) scale = tf.sigmoid(unconstrained_scale + 2.0) if not inverse: z_tr = (x_tr + loc) * scale else: z_tr = x_tr / scale - loc logabsdet = gops.reduce_sum_over_lc(tf.log(scale), mask) # [B] if inverse: logabsdet *= -1 tf.summary.histogram("_loc", tf.boolean_mask(loc, mask)) tf.summary.histogram("_scale", tf.boolean_mask(scale, mask)) result = gops.join_coupling(z_id, z_tr, split_dim, identity_first) result = tf.reshape(result, [batch_size, length, n_channels]) return result, logabsdet
import tensorflow.compat.v1 as tf import numpy as np tf.disable_v2_behavior() x_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32) y_data = np.array([[0], [1], [1], [0]], dtype=np.float32) X = tf.placeholder(tf.float32) Y = tf.placeholder(tf.float32) # use layer W1 = tf.Variable(tf.random_normal([2, 2]), name='weight') b1 = tf.Variable(tf.random_normal([2]), name='bias') layer1 = tf.sigmoid(tf.matmul(X, W1) + b1) W2 = tf.Variable(tf.random_normal([2, 1]), name='weight') b2 = tf.Variable(tf.random_normal([1]), name='bias') hypothesis = tf.sigmoid(tf.matmul(layer1, W2) + b2) cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis)) train = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32) accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for step in range(10001):
def build(self, inputs): """Build the graph for this configuration. Args: inputs: A dict of inputs. For training, should contain 'wav'. Returns: A dict of outputs that includes the 'predictions', 'init_ops', the 'push_ops', and the 'quantized_input'. """ num_stages = 10 num_layers = 30 filter_length = 3 width = 512 skip_width = 256 num_z = 16 # Encode the source with 8-bit Mu-Law. x = inputs['wav'] batch_size = self.batch_size x_quantized = utils.mu_law(x) x_scaled = tf.cast(x_quantized, tf.float32) / 128.0 x_scaled = tf.expand_dims(x_scaled, 2) encoding = tf.placeholder(name='encoding', shape=[batch_size, num_z], dtype=tf.float32) en = tf.expand_dims(encoding, 1) init_ops, push_ops = [], [] ### # The WaveNet Decoder. ### l = x_scaled l, inits, pushs = utils.causal_linear(x=l, n_inputs=1, n_outputs=width, name='startconv', rate=1, batch_size=batch_size, filter_length=filter_length) for init in inits: init_ops.append(init) for push in pushs: push_ops.append(push) # Set up skip connections. s = utils.linear(l, width, skip_width, name='skip_start') # Residual blocks with skip connections. for i in range(num_layers): dilation = 2**(i % num_stages) # dilated masked cnn d, inits, pushs = utils.causal_linear(x=l, n_inputs=width, n_outputs=width * 2, name='dilatedconv_%d' % (i + 1), rate=dilation, batch_size=batch_size, filter_length=filter_length) for init in inits: init_ops.append(init) for push in pushs: push_ops.append(push) # local conditioning d += utils.linear(en, num_z, width * 2, name='cond_map_%d' % (i + 1)) # gated cnn assert d.get_shape().as_list()[2] % 2 == 0 m = d.get_shape().as_list()[2] // 2 d = tf.sigmoid(d[:, :, :m]) * tf.tanh(d[:, :, m:]) # residuals l += utils.linear(d, width, width, name='res_%d' % (i + 1)) # skips s += utils.linear(d, width, skip_width, name='skip_%d' % (i + 1)) s = tf.nn.relu(s) s = (utils.linear(s, skip_width, skip_width, name='out1') + utils.linear(en, num_z, skip_width, name='cond_map_out1')) s = tf.nn.relu(s) ### # Compute the logits and get the loss. ### logits = utils.linear(s, skip_width, 256, name='logits') logits = tf.reshape(logits, [-1, 256]) probs = tf.nn.softmax(logits, name='softmax') return { 'init_ops': init_ops, 'push_ops': push_ops, 'predictions': probs, 'encoding': encoding, 'quantized_input': x_quantized, }
def get_word(self, sample_y, sample_h_pre, alpha_past_pre, sample_annotation): emb = tf.cond( sample_y[0] < 0, lambda: tf.fill((1, self.word_dim), 0.0), lambda: tf.nn.embedding_lookup(self.embed_matrix, sample_y)) # ret = self.parser.one_time_step((h_pre, None, None, alpha_past_pre, annotation, None), (emb, None)) emb_y_z_r_vector = tf.tensordot(emb, self.parser.W_yz_yr, axes=1) + \ self.parser.b_yz_yr # [batch, 2 * dim_decoder] hidden_z_r_vector = tf.tensordot(sample_h_pre, self.parser.U_hz_hr, axes=1) # [batch, 2 * dim_decoder] pre_z_r_vector = tf.sigmoid(emb_y_z_r_vector + \ hidden_z_r_vector) # [batch, 2 * dim_decoder] r1 = pre_z_r_vector[:, :self.parser.hidden_dim] # [batch, dim_decoder] z1 = pre_z_r_vector[:, self.parser.hidden_dim:] # [batch, dim_decoder] emb_y_h_vector = tf.tensordot(emb, self.parser.W_yh, axes=1) + \ self.parser.b_yh # [batch, dim_decoder] hidden_r_h_vector = tf.tensordot(sample_h_pre, self.parser.U_rh, axes=1) # [batch, dim_decoder] hidden_r_h_vector *= r1 pre_h_proposal = tf.tanh(hidden_r_h_vector + emb_y_h_vector) pre_h = z1 * sample_h_pre + (1. - z1) * pre_h_proposal context, _, alpha_past = self.parser.attender.get_context( sample_annotation, pre_h, alpha_past_pre, None) # [batch, dim_ctx] emb_y_z_r_nl_vector = tf.tensordot( pre_h, self.parser.U_hz_hr_nl, axes=1) + self.parser.b_hz_hr_nl context_z_r_vector = tf.tensordot(context, self.parser.W_c_z_r, axes=1) z_r_vector = tf.sigmoid(emb_y_z_r_nl_vector + context_z_r_vector) r2 = z_r_vector[:, :self.parser.hidden_dim] z2 = z_r_vector[:, self.parser.hidden_dim:] emb_y_h_nl_vector = tf.tensordot(pre_h, self.parser.U_rh_nl, axes=1) + self.parser.b_rh_nl emb_y_h_nl_vector *= r2 context_h_vector = tf.tensordot(context, self.parser.W_c_h_nl, axes=1) h_proposal = tf.tanh(emb_y_h_nl_vector + context_h_vector) h = z2 * pre_h + (1. - z2) * h_proposal h_t = h c_t = context alpha_past_t = alpha_past y_t_1 = emb logit_gru = tf.tensordot(h_t, self.Wh, axes=1) + self.bh logit_ctx = tf.tensordot(c_t, self.Wc, axes=1) + self.bc logit_pre = tf.tensordot(y_t_1, self.Wy, axes=1) + self.by logit = logit_pre + logit_ctx + logit_gru # batch x word_dim shape = tf.shape(logit) logit = tf.reshape(logit, [-1, shape[1] // 2, 2]) logit = tf.reduce_max(logit, axis=2) logit = tf.layers.dropout(inputs=logit, rate=0.2, training=self.training) logit = tf.tensordot(logit, self.Wo, axes=1) + self.bo next_probs = tf.nn.softmax(logits=logit) next_word = tf.reduce_max(tf.multinomial(next_probs, num_samples=1), axis=1) return next_probs, next_word, h_t, alpha_past_t
def __init__(self, config): # 导入配置好的参数 self.hiddens = hiddens = config.modelConfig.hidden_layers # 200个隐层节点 self.num_skills = num_skills = config.num_skills self.input_size = input_size = config.input_size self.batch_size = batch_size = config.batch_size self.keep_prob_value = config.modelConfig.dropout_keep_prob # 定义需要喂给模型的参数 self.max_steps = tf.placeholder(tf.int32, name="max_steps") # 当前batch中最大序列长度 # input_data: (32, None, 248):None表示的是序列的长度, 即max_len/num_steps/最长做题序列 self.input_data = tf.placeholder(tf.float32, [batch_size, None, input_size], name="input_x") self.sequence_len = tf.placeholder(tf.int32, [batch_size], name="sequence_len") self.keep_prob = tf.placeholder(tf.float32, name="keep_prob") # dropout keep prob self.target_id = tf.placeholder(tf.int32, [batch_size, None], name="target_id") self.target_correctness = tf.placeholder(tf.float32, [batch_size, None], name="target_correctness") self.flat_target_correctness = None # 构建lstm模型结构self.hidden_cell,包含hiddens(200)个节点 hidden_layers = [] for idx, hidden_size in enumerate(hiddens): lstm_layer = tf.nn.rnn_cell.LSTMCell(num_units=hidden_size, state_is_tuple=True) hidden_layer = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_layer, output_keep_prob=self.keep_prob) hidden_layers.append(hidden_layer) self.hidden_cell = tf.nn.rnn_cell.MultiRNNCell(cells=hidden_layers, state_is_tuple=True) # 采用动态rnn,动态输入序列的长度 outputs, self.current_state = tf.nn.dynamic_rnn(cell=self.hidden_cell, inputs=self.input_data, sequence_length=self.sequence_len, dtype=tf.float32) # 隐层到输出层的权重系数(最后隐层的神经元数量,知识点数(num_skills)) output_w = tf.get_variable("W", [hiddens[-1], num_skills]) output_b = tf.get_variable("b", [num_skills]) # output: (batch_size * max_steps, 最后隐层的神经元数量) self.output = tf.reshape(outputs, [batch_size * self.max_steps, hiddens[-1]]) # 输出层logits:(batch_size * max_steps, num_skills),猜测是做完第step道题目之后,每个学生对每个知识点的掌握情况 self.logits = tf.matmul(self.output, output_w) + output_b # 转化为(batch_size, max_steps, num_skills) self.mat_logits = tf.reshape(self.logits, [batch_size, self.max_steps, num_skills]) # 对每个batch中每个序列中的每个时间点的输出中的每个值进行sigmoid计算,这里的值表示对某个知识点的掌握情况, self.pred_all = tf.sigmoid(self.mat_logits, name="pred_all") # self.target_correctness是做题序列目标结果,即0或1的序列,由用户进行输入 flat_target_correctness = tf.reshape(self.target_correctness, [-1]) # flat_target_correctness是target_correctness的一维表示 self.flat_target_correctness = flat_target_correctness flat_base_target_index = tf.range(batch_size * self.max_steps) * num_skills flat_base_target_id = tf.reshape(self.target_id, [-1]) # 目标的序列flat_target_id: 长度是batch_size * num_steps flat_target_id = flat_base_target_id + flat_base_target_index # flat_logits是模型预测的输出,其长度为batch_size * num_steps * num_skills flat_logits = tf.reshape(self.logits, [-1]) # tf.gather用一个一维的索引数组,将张量中对应索引的向量提取出来 flat_target_logits = tf.gather(flat_logits, flat_target_id) # 对切片后的数据进行sigmoid转换 self.pred = tf.sigmoid(tf.reshape(flat_target_logits, [batch_size, self.max_steps]), name="pred") # 将sigmoid后的值表示为0或1 self.binary_pred = tf.cast(tf.greater_equal(self.pred, 0.5), tf.float32, name="binary_pred") # 定义损失函数 with tf.name_scope("loss"): self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=flat_target_correctness, logits=flat_target_logits))
def _gate(self, x, W, b): return tf.sigmoid(self._net(x, W, b))
def build_graph(self): self.params = self.init_params() self.x_input = tf.placeholder(tf.int64, [None, None]) self.mask_x = tf.placeholder(tf.float32, [None, None]) self.y_target = tf.placeholder(tf.int64, [None]) self.len_x = tf.placeholder(tf.int64, [None]) self.keep_prob = tf.placeholder(tf.float32, [None]) self.starting = tf.placeholder(tf.bool) """ attention gru & global gru Output: global_session_representation attentive_session_represention """ self.n_timesteps = tf.shape(self.x_input)[1] self.n_samples = tf.shape(self.x_input)[0] emb = tf.nn.embedding_lookup(self.params['Wemb'], self.x_input) emb = tf.nn.dropout(emb, keep_prob=self.keep_prob[0]) with tf.variable_scope('global_encoder'): cell_global = tf.compat.v1.nn.rnn_cell.GRUCell(self.hidden_units) init_state = cell_global.zero_state(self.n_samples, tf.float32) outputs_global, state_global = tf.nn.dynamic_rnn( cell_global, inputs=emb, sequence_length=self.len_x, initial_state=init_state, dtype=tf.float32) last_global = state_global # batch_size*hidden_units with tf.variable_scope('local_encoder'): cell_local = tf.compat.v1.nn.rnn_cell.GRUCell(self.hidden_units) init_statel = cell_local.zero_state(self.n_samples, tf.float32) outputs_local, state_local = tf.nn.dynamic_rnn( cell_local, inputs=emb, sequence_length=self.len_x, initial_state=init_statel, dtype=tf.float32) last_h = state_local # batch_size*hidden_units tmp_0 = tf.reshape(outputs_local, [-1, self.hidden_units]) tmp_1 = tf.reshape( tf.matmul(tmp_0, self.params['W_encoder']), [self.n_samples, self.n_timesteps, self.hidden_units]) tmp_2 = tf.expand_dims(tf.matmul(last_h, self.params['W_decoder']), 1) # batch_size*hidden_units tmp_3 = tf.reshape( tf.sigmoid(tmp_1 + tmp_2), [-1, self.hidden_units]) # batch_size,n_steps, hidden_units alpha = tf.matmul(tmp_3, tf.transpose(self.params['bl_vector'])) res = tf.reduce_sum(alpha, axis=1) sim_matrix = tf.reshape(res, [self.n_samples, self.n_timesteps]) att = tf.nn.softmax( sim_matrix * self.mask_x) * self.mask_x # batch_size*n_step p = tf.expand_dims(tf.reduce_sum(att, axis=1), 1) weight = att / p atttention_proj = tf.reduce_sum( (outputs_local * tf.expand_dims(weight, 2)), 1) self.global_session_representation = last_global self.attentive_session_represention = atttention_proj self.ome_cell = OME(mem_size=(self.memory_size, self.memory_dim), shift_range=self.shift_range, hidden_units=self.hidden_units) self.state = tf.placeholder(dtype=tf.float32, shape=[None, self.hidden_units]) self.memory_network_reads, self.memory_new_state = self.ome_cell( self.state, atttention_proj, self.starting) att_mean, att_var = tf.nn.moments(self.attentive_session_represention, axes=[1]) self.attentive_session_represention = ( self.attentive_session_represention - tf.expand_dims( att_mean, 1)) / tf.expand_dims(tf.sqrt(att_var + 1e-10), 1) glo_mean, glo_var = tf.nn.moments(self.global_session_representation, axes=[1]) self.global_session_representation = ( self.global_session_representation - tf.expand_dims( glo_mean, 1)) / tf.expand_dims(tf.sqrt(glo_var + 1e-10), 1) ntm_mean, ntm_var = tf.nn.moments(self.memory_network_reads, axes=[1]) self.memory_network_reads = ( self.memory_network_reads - tf.expand_dims( ntm_mean, 1)) / tf.expand_dims(tf.sqrt(ntm_var + 1e-10), 1) new_gate = tf.matmul(self.attentive_session_represention, self.params['inner_encoder']) + \ tf.matmul(self.memory_network_reads, self.params['outer_encoder']) + \ tf.matmul(self.global_session_representation, self.params['state_encoder']) new_gate = tf.nn.sigmoid(new_gate) self.narm_representation = tf.concat( (self.attentive_session_represention, self.global_session_representation), axis=1) self.memory_representation = tf.concat( (self.memory_network_reads, self.memory_network_reads), axis=1) final_representation = new_gate * self.narm_representation + ( 1 - new_gate) * self.memory_representation # prediction proj = tf.nn.dropout(final_representation, keep_prob=self.keep_prob[1]) ytem = tf.matmul(self.params['Wemb'], self.params['bili']) # [n_items, 200] hypothesis = tf.matmul( proj, tf.transpose(ytem)) + 1e-10 # [batch_size, n_step, n_items] self.hypo = tf.nn.softmax(hypothesis) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=hypothesis, labels=self.y_target)) # optimize self.optimizer = tf.train.AdamOptimizer( learning_rate=self.lr).minimize(self.loss) self.saver = tf.train.Saver(max_to_keep=1)
def loss_layer(self, predict, labels): """ Define loss layer Parameters ---------- predict: TensorFlow Tensor The predicted values for the batch of data labels: TensorFlow Tensor Ground truth labels for the batch of data Returns ------- loss: TensorFlow Tensor Loss (combination of regression and classification losses) """ rescore = int(_utils.convert_shared_float_array_to_numpy(self.config.get('od_rescore'))) lmb_coord_xy = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_coord_xy')) lmb_coord_wh = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_coord_wh')) lmb_obj = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_obj')) lmb_noobj = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_noobj')) lmb_class = _utils.convert_shared_float_array_to_numpy(self.config.get('lmb_class')) # Prediction values from model on the images ypred = _tf.reshape(predict, [-1] + list(self.grid_shape) + [self.num_anchors, 5 + self.num_classes]) raw_xy = ypred[..., 0:2] raw_wh = ypred[..., 2:4] raw_conf = ypred[..., 4] class_scores = ypred[..., 5:] tf_anchors = _tf.constant(self.anchors) # Ground Truth info derived from ymap/labels gt_xy = labels[..., 0:2] gt_wh = labels[..., 2:4] gt_raw_wh = _tf.math.log(gt_wh / tf_anchors + 1e-5) gt_conf = labels[..., 4] gt_conf0 = labels[..., 0:1, 4] gt_class = labels[..., 5:] # Calculations on predicted confidences xy = _tf.sigmoid(raw_xy) wh = _tf.exp(raw_wh) * tf_anchors wh_anchors = _tf.exp(raw_wh * 0.0) * tf_anchors lo = xy - wh / 2 hi = xy + wh / 2 gt_area = gt_wh[..., 0] * gt_wh[..., 1] gt_lo = gt_xy - gt_wh / 2 gt_hi = gt_xy + gt_wh / 2 c_inter = _tf.maximum(2 * _tf.minimum(wh_anchors / 2, gt_wh / 2), 0) c_area = wh_anchors[..., 0] * wh_anchors[..., 1] c_inter_area = c_inter[..., 0] * c_inter[..., 1] c_iou = c_inter_area / (c_area + gt_area - c_inter_area) inter = _tf.maximum(_tf.minimum(hi, gt_hi) - _tf.maximum(lo, gt_lo), 0) area = wh[..., 0] * wh[..., 1] inter_area = inter[..., 0] * inter[..., 1] iou = inter_area / (area + gt_area - inter_area) active_iou = c_iou max_iou = _tf.reduce_max(active_iou, 3, keepdims=True) resp_box = _tf.cast(_tf.equal(active_iou, max_iou), dtype=_tf.float32) count = _tf.reduce_sum(gt_conf0) kr_obj_ij = _tf.stop_gradient(resp_box * gt_conf) kr_noobj_ij = 1 - kr_obj_ij s = 1 / (self.batch_size * self.grid_shape[0] * self.grid_shape[1]) kr_obj_ij_plus1 = _tf.expand_dims(kr_obj_ij, -1) if rescore: obj_gt_conf = kr_obj_ij * _tf.stop_gradient(iou) else: obj_gt_conf = kr_obj_ij kr_box = kr_obj_ij_plus1 obj_w = (kr_obj_ij * lmb_obj + kr_noobj_ij * lmb_noobj) loss_xy = lmb_coord_xy * _tf.reduce_sum(kr_box * _tf.square(gt_xy - xy)) / (count + 0.01) loss_wh = _tf.losses.huber_loss (labels=gt_raw_wh, predictions=raw_wh, weights=lmb_coord_wh * kr_box, delta= 1.0) # Confidence loss loss_conf = s * _tf.reduce_sum( obj_w * _tf.nn.sigmoid_cross_entropy_with_logits(labels=obj_gt_conf, logits=raw_conf)) # TODO: tf.nn.softmax_cross_entropy_with_logits_v2 instead of tf.nn.softmax_cross_entropy_with_logits loss_cls = lmb_class * _tf.reduce_sum( kr_obj_ij * _tf.nn.softmax_cross_entropy_with_logits_v2(labels=gt_class, logits=class_scores)) / ( count + 0.01) losses = [loss_xy, loss_wh, loss_conf, loss_cls] loss = _tf.add_n(losses) return loss
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Current mode: %s ***" % mode) tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids_1 = features["input_ids_1"] input_mask_1 = features["input_mask_1"] if train_mode == constants.TRAIN_MODE_FINETUNE: masked_lm_positions_1 = tf.zeros([1]) masked_lm_ids_1 = tf.zeros([1]) masked_lm_weights_1 = tf.zeros([1]) else: masked_lm_positions_1 = features["masked_lm_positions_1"] masked_lm_ids_1 = features["masked_lm_ids_1"] masked_lm_weights_1 = features["masked_lm_weights_1"] input_ids_2 = features["input_ids_2"] input_mask_2 = features["input_mask_2"] if train_mode == constants.TRAIN_MODE_FINETUNE: masked_lm_positions_2 = tf.zeros([1]) masked_lm_ids_2 = tf.zeros([1]) masked_lm_weights_2 = tf.zeros([1]) else: masked_lm_positions_2 = features["masked_lm_positions_2"] masked_lm_ids_2 = features["masked_lm_ids_2"] masked_lm_weights_2 = features["masked_lm_weights_2"] documents_match_labels = features["documents_match_labels"] # Since the document_match_labels might contain labels like 0/1/2, we need # to transfer these labels to binary labels like 0/1. documents_match_labels = tf.cast(documents_match_labels > 0, tf.float32) is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones( tf.shape(documents_match_labels), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) if (dual_encoder_config.encoder_config.model_name == constants.MODEL_NAME_SMITH_DUAL_ENCODER): # For the smith model, since the actual looped number of sentences per # document maybe smaller than max_doc_length_by_sentence, we need to # overwrite the lm weights with the actual lm weights returned by the # function. (masked_lm_loss_1, masked_lm_loss_2, masked_lm_example_loss_1, masked_lm_example_loss_2, masked_lm_weights_1, masked_lm_weights_2, masked_sent_lm_loss_1, masked_sent_lm_loss_2, masked_sent_per_example_loss_1, masked_sent_per_example_loss_2, masked_sent_weight_1, masked_sent_weight_2, seq_embed_1, seq_embed_2, input_sent_embed_1, input_sent_embed_2, output_sent_embed_1, output_sent_embed_2, siamese_loss, siamese_example_loss, siamese_logits) = build_smith_dual_encoder( dual_encoder_config, train_mode, is_training, input_ids_1, input_mask_1, masked_lm_positions_1, masked_lm_ids_1, masked_lm_weights_1, input_ids_2, input_mask_2, masked_lm_positions_2, masked_lm_ids_2, masked_lm_weights_2, use_one_hot_embeddings, documents_match_labels, debugging) else: raise ValueError( "Only smith_dual_encoder is supported: %s" % dual_encoder_config.encoder_config.model_name) # There are three different modes for training in the smith model. # 1. joint_train: a multi-task learning setting which combines the masked # word LM losses for doc1/doc2 and the siamese matching loss. If we add the # masked sentence LM task, we also add the masked sentence LM losses for # the two documents. # 2. pretrain: only contains the masked word LM losses for doc1/doc2. We # currently didn't include the NSP loss since NSP loss is not very useful # according to the XLNet/ RoBERTa/ ALBERT paper. If we add the masked # sentence LM task, we also add the masked sentence LM losses for the # two documents. # 3. finetune: fine tune the model with loaded pretrained checkpoint only # with the siamese matching loss. If we add the masked sentence LM task, # we also add the masked sentence LM losses for the two documents. if train_mode == constants.TRAIN_MODE_JOINT_TRAIN: total_loss = masked_lm_loss_1 + masked_lm_loss_2 + siamese_loss elif train_mode == constants.TRAIN_MODE_PRETRAIN: total_loss = masked_lm_loss_1 + masked_lm_loss_2 elif train_mode == constants.TRAIN_MODE_FINETUNE: total_loss = siamese_loss else: raise ValueError("Only joint_train, pretrain, finetune are supported.") # If we add the masked sentence LM task, we also add the masked sentence # LM losses for the two documents. if dual_encoder_config.encoder_config.use_masked_sentence_lm_loss: total_loss += (masked_sent_lm_loss_1 + masked_sent_lm_loss_2) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None init_checkpoint = dual_encoder_config.encoder_config.init_checkpoint # Load pretrained BERT checkpoints if there is a specified path. if init_checkpoint: tf.logging.info("**** Passed pretrained BERT checkpoint = %s ****", init_checkpoint) (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = ", *INIT_RANDOMLY*" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None predicted_score = tf.sigmoid(siamese_logits) predicted_class = tf.round(predicted_score) if dual_encoder_config.encoder_config.model_name == constants.MODEL_NAME_SMITH_DUAL_ENCODER: _, prediction_dict = utils.get_export_outputs_prediction_dict_smith_de( seq_embed_1, seq_embed_2, predicted_score, predicted_class, documents_match_labels, input_sent_embed_1, input_sent_embed_2, output_sent_embed_1, output_sent_embed_2) else: raise ValueError("Unsupported model: %s" % dual_encoder_config.encoder_config.model_name) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: if (train_mode == constants.TRAIN_MODE_JOINT_TRAIN or train_mode == constants.TRAIN_MODE_PRETRAIN): eval_metrics = (metric_fns.metric_fn_pretrain, [ masked_lm_example_loss_1, masked_lm_weights_1, masked_sent_per_example_loss_1, masked_sent_weight_1, masked_lm_example_loss_2, masked_lm_weights_2, masked_sent_per_example_loss_2, masked_sent_weight_2, predicted_class, documents_match_labels, is_real_example ]) elif train_mode == constants.TRAIN_MODE_FINETUNE: eval_metrics = (metric_fns.metric_fn_finetune, [ predicted_class, documents_match_labels, siamese_example_loss, is_real_example ]) else: raise ValueError("Only joint_train, pretrain, finetune are supported.") output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=prediction_dict, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN, EVAL, PREDICT modes are supported: %s" % mode) return output_spec
def _build_outputs(self, images, labels, mode): is_training = (mode == mode_keys.TRAIN) if 'anchor_boxes' in labels: anchor_boxes = labels['anchor_boxes'] else: anchor_boxes = anchor.Anchor( self._params.architecture.min_level, self._params.architecture.max_level, self._params.anchor.num_scales, self._params.anchor.aspect_ratios, self._params.anchor.anchor_size, images.get_shape().as_list()[1:3]).multilevel_boxes batch_size = tf.shape(images)[0] for level in anchor_boxes: anchor_boxes[level] = tf.tile( tf.expand_dims(anchor_boxes[level], 0), [batch_size, 1, 1]) backbone_features = self._backbone_fn(images, is_training=is_training) fpn_features = self._fpn_fn(backbone_features, is_training=is_training) cls_outputs, box_outputs = self._retinanet_head_fn( fpn_features, is_training=is_training) # Shapemask mask prediction. if is_training: boxes = labels['mask_boxes'] outer_boxes = labels['mask_outer_boxes'] classes = labels['mask_classes'] else: detection_results = self._generate_detections_fn( box_outputs, cls_outputs, anchor_boxes, labels['image_info'][:, 1:2, :]) boxes = detection_results['detection_boxes'] scores = detection_results['detection_scores'] classes = detection_results['detection_classes'] valid_detections = detection_results['num_detections'] # Use list as input to avoide segmentation fault on TPU. image_size = images.get_shape().as_list()[1:3] outer_boxes = box_utils.compute_outer_boxes( tf.reshape(boxes, [-1, 4]), image_size, scale=self._outer_box_scale) outer_boxes = tf.reshape(outer_boxes, tf.shape(boxes)) classes = tf.cast(classes, tf.int32) instance_features, prior_masks = self._shape_prior_head_fn( fpn_features, boxes, outer_boxes, classes, is_training) coarse_mask_logits = self._coarse_mask_fn(instance_features, prior_masks, classes, is_training) fine_mask_logits = self._fine_mask_fn(instance_features, coarse_mask_logits, classes, is_training) model_outputs = { 'cls_outputs': cls_outputs, 'box_outputs': box_outputs, 'fine_mask_logits': fine_mask_logits, 'coarse_mask_logits': coarse_mask_logits, 'prior_masks': prior_masks, 'fpn_features': fpn_features, } if not is_training: model_outputs.update({ 'num_detections': valid_detections, 'detection_boxes': boxes, 'detection_outer_boxes': outer_boxes, 'detection_masks': tf.sigmoid(fine_mask_logits), 'detection_classes': tf.cast(classes, dtype=tf.int32), 'detection_scores': scores, }) return model_outputs
def update_conv_routing_fast(wx, input_activation, activation_biases, sigma_biases, logit_shape, num_out_atoms, input_dim, num_routing, output_dim, final_beta, min_var, stride, layer_name): """Fast Convolutional Routing with EM for Mixture of Gaussians. The main difference with conv_routing is replacing extract_image_patches with utils.kernel_tile which uses a special conv-deconv operation. Args: wx: [batch, indim, outdim, outatom, height, width, kernel, kernel] input_activation: [batch, indim, 1, 1, height, width, kernel, kernel] activation_biases: [1, 1, outdim, 1, height, width] sigma_biases: [1, 1, outdim, 1, height, width] logit_shape: [indim, outdim, 1, height, width, kernel, kernel] num_out_atoms: number of atoms in each capsule, e.g. 9 or 16. input_dim: number of input capsule types, e.g. 32. num_routing: number of routing iterations, e.g. 3. output_dim: number of output capsule types, e.g. 32. final_beta: the temperature for making routing factors sharper. min_var: minimum variance for each capsule to avoid NaNs. stride: the stride with which wx was calculated, e.g. 2 or 1. layer_name: the name of this layer, e.g. conv_capsule1. Returns: out_activation and out_center: final activation and capsule values. """ # prior = utils.bias_variable([1] + logit_shape, name='prior') tf.logging.info( 'update_conv_routing_fast: Wx=%s act=%s act_bias=%s sigma_bias=%s logit_shape=%s', wx, input_activation, activation_biases, sigma_biases, logit_shape) with tf.name_scope('update_conv_routing_fast'): # With known shapes, these could all be replaced with tf.zeros with tf.name_scope('start_posterior'): start_posterior = tf.nn.softmax(tf.fill( tf.stack([ tf.shape(input_activation)[0], logit_shape[0], logit_shape[1], logit_shape[2], logit_shape[3], logit_shape[4], logit_shape[5], logit_shape[6] ]), 0.0), dim=2) with tf.name_scope('start_center'): start_center = tf.fill( tf.stack([ tf.shape(input_activation)[0], 1, output_dim, num_out_atoms, logit_shape[3], logit_shape[4], 1, 1 ]), 0.0) b = tf.shape(input_activation)[0] c = output_dim h = logit_shape[3] k = logit_shape[5] s = stride ih = h + (h - 1) * (s - 1) + (k - 1) tile_filter = np.zeros(shape=[k, k, 1, k * k], dtype=np.float32) for i in range(k): for j in range(k): tile_filter[i, j, :, i * k + j] = 1.0 # Body of routing loop. def _body(i, posterior, center, wx, activation_biases, sigma_biases, input_activation, tile_filter): """Body of EM while loop.""" tf.logging.info(' Wx: %s', wx) beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) posterior = tf.Print(posterior, [ layer_name, i, h, ih, tf.reduce_min(posterior), tf.reduce_max(posterior) ], message='posterior') # route: [outdim, height?, width?, batch, indim] with tf.name_scope('vote_conf'): vote_conf = posterior * input_activation vote_conf = tf.maximum(vote_conf, 0.0) # masses: [batch, 1, outdim, 1, height, width, 1, 1] with tf.name_scope('masses'): masses = tf.reduce_sum(vote_conf, axis=[1, -1, -2], keepdims=True, name='masses_calculation') + 0.0000001 with tf.name_scope('preactivate_unrolled'): preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] with tf.name_scope('center'): center = .9 * tf.reduce_sum( preactivate_unrolled, axis=[1, -1, -2], keepdims=True) / masses + .1 * center # Rematerialization to save GPU memory. (+22ms/-1.6GB) # @tf.contrib.layers.recompute_grad def compute_noise_and_variance(wx, center, vote_conf, masses): noise = tf.squared_difference(wx, center) variance = min_var + tf.reduce_sum( vote_conf * noise, axis=[1, -1, -2], keepdims=True, name='variance_calculation') / masses return noise, variance with tf.name_scope('compute_noise_and_variance'): noise, variance = compute_noise_and_variance( wx, center, vote_conf, masses) with tf.name_scope('win'): log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keepdims=True) log_2pi = tf.log(2 * math.pi) sigma_b = tf.log(sigma_biases * sigma_biases + min_var) win = masses * (p_i - num_out_atoms * (sigma_b + log_2pi + 1.0)) with tf.name_scope('logit'): logit = beta * (win - activation_biases * 50 * num_out_atoms) with tf.name_scope('activation_update'): activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) with tf.name_scope('sigma_update'): log_det_sigma = -1 * p_i sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 with tf.name_scope('exp_update'): exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = tf.subtract(activation_update - sigma_update, exp_update, name='prior_update_sub') max_prior_update = tf.reduce_max(prior_update, axis=[2, 3, 4, 5, 6, 7], keepdims=True, name='max_prior_opdate') prior_normal = tf.add(prior_update, -1 * max_prior_update) prior_exp = tf.exp(prior_normal) prior_exp_out = tf.reduce_sum(prior_exp, axis=2, keepdims=True, name='prior_exp_out') prior_exp_reshape = tf.reshape(prior_exp_out, [-1, h, h, k * k], name='prior_exp_reshape') sum_prior = tf.nn.conv2d_transpose(prior_exp_reshape, tile_filter, output_shape=[b * c, ih, ih, 1], strides=[1, s, s, 1], padding='VALID') sum_prior = tf.maximum(1e-6, sum_prior) sum_prior_patch = utils.kernel_tile(sum_prior, k, s, 1, name='sum_prior_patch') with utils.maybe_jit_scope(), tf.name_scope('posterior'): sum_prior_reshape = tf.reshape( sum_prior_patch, [-1, input_dim, 1, 1, h, h, k, k]) posterior = prior_exp / sum_prior_reshape return (i + 1, posterior, logit, center, masses) posterior, center = start_posterior, start_center for j in range(num_routing): with tf.name_scope('iter{}'.format(j)): tf.logging.info('iteration %d %s', j, '=' * 80) jj = tf.constant(j, dtype=tf.int32) _, posterior, activation, center, mass = _body( jj, posterior, center, wx, activation_biases, sigma_biases, input_activation, tile_filter) post, out_activation, out_center, out_mass = posterior, activation, center, mass with tf.name_scope('out_activation'): utils.activation_summary(tf.sigmoid(out_activation)) with tf.name_scope('masses'): utils.activation_summary(tf.sigmoid(out_mass)) with tf.name_scope('posterior'): utils.activation_summary(post) return out_activation, out_center
def update_em_routing(wx, input_activation, activation_biases, sigma_biases, logit_shape, num_out_atoms, num_routing, output_dim, leaky, final_beta, min_var): """Fully connected routing with EM for Mixture of Gaussians.""" # Wx: [batch, indim, outdim, outatom, height, width] # logit_shape: [indim, outdim, 1, height, width] # input_activations: [batch, indim, 1, 1, 1, 1] # activation_biases: [1, 1, outdim, 1, height, width] # prior = utils.bias_variable([1] + logit_shape, name='prior') update = tf.fill( tf.stack([ tf.shape(input_activation)[0], logit_shape[0], logit_shape[1], logit_shape[2], logit_shape[3], logit_shape[4] ]), 0.0) out_activation = tf.fill( tf.stack([ tf.shape(input_activation)[0], 1, output_dim, 1, logit_shape[3], logit_shape[4] ]), 0.0) out_center = tf.fill( tf.stack([ tf.shape(input_activation)[0], 1, output_dim, num_out_atoms, logit_shape[3], logit_shape[4] ]), 0.0) def _body(i, update, activation, center): """Body of the EM while loop.""" del activation beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) # beta = final_beta # route: [outdim, height?, width?, batch, indim] if leaky: posterior = layers.leaky_routing(update, output_dim) else: posterior = tf.nn.softmax(update, dim=2) vote_conf = posterior * input_activation # masses: [batch, 1, outdim, 1, height, width] masses = tf.reduce_sum(vote_conf, axis=1, keep_dims=True) + 0.00001 preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] center = .9 * tf.reduce_sum(preactivate_unrolled, axis=1, keep_dims=True) / masses + .1 * center noise = (wx - center) * (wx - center) variance = min_var + tf.reduce_sum( vote_conf * noise, axis=1, keep_dims=True) / masses log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True) log_2pi = tf.log(2 * math.pi) win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0)) logit = beta * (win - activation_biases * 5000) activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) # return activation, center log_det_sigma = tf.reduce_sum(log_variance, axis=3, keep_dims=True) sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = activation_update - sigma_update - exp_update return (prior_update, logit, center) # activations = tf.TensorArray( # dtype=tf.float32, size=num_routing, clear_after_read=False) # centers = tf.TensorArray( # dtype=tf.float32, size=num_routing, clear_after_read=False) # updates = tf.TensorArray( # dtype=tf.float32, size=num_routing, clear_after_read=False) # updates.write(0, prior_update) for i in range(num_routing): update, out_activation, out_center = _body(i, update, out_activation, out_center) # for j in range(num_routing): # _, prior_update, out_activation, out_center = _body( # i, prior_update, start_activation, start_center) with tf.name_scope('out_activation'): utils.activation_summary(tf.sigmoid(out_activation)) with tf.name_scope('noise'): utils.variable_summaries((wx - out_center) * (wx - out_center)) with tf.name_scope('Wx'): utils.variable_summaries(wx) # for i in range(num_routing): # utils.activation_summary(activations.read(i)) # return activations.read(num_routing - 1), centers.read(num_routing - 1) return out_activation, out_center