def Lock(self): """Used to lock in the personalization.""" lock_ops = [tf.no_op()] # 什么都不做,仅做为点位符使用控制边界。 if self.lowrank_adaptation: # compute the new W left_adapt = tf.squeeze(self.left_adapt) right_adapt = tf.squeeze(self.right_adapt) # final_w = self.W + tf.matmul(left_adapt, right_adapt) final_w = tf.add(tf.matmul(left_adapt, right_adapt),self.W) #self.lockedW = final_w #lock_ops.append(self.lockedW) # self.lockedW.assign(final_w) #lock = tf.assign(self.lockedW, final_w) #您可以使用tf.identity取消引用_ref类型 self.lockedW = final_w lock2 = tf.identity(self.lockedW) lock_ops.append(lock2) # else: # self.lockedW = self.W # lock_ops.append(self.lockedW) lock3 = tf.assign(self.lockedW, self.W) lock4 = tf.identity(self.lockedW) lock_ops.append(lock4) # lock_ops.append(self.lockedW.assign(self.W)) if self.mikolov_adapt: final_bias = tf.squeeze(self.bias + self.delta) lock_ops.append(self.lockedBias.assign(final_bias)) else: lock_ops.append(self.lockedBias.assign(self.bias)) # self.lock_op = tf.group(*lock_ops,name="lock_op") # 没有返回值,只是个op self.lock_op = tf.tuple(lock_ops,name="lock_op") #返回的是list of tensor。
def compute_accuracy(x, l, mask): """Compute model accuracy.""" preds = ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) _, acc_update_op = tf.metrics.accuracy(l, preds, weights=mask) if FLAGS.surrogate_attack: preds = sur_ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) acc_update_op = tf.tuple((acc_update_op, tf.metrics.accuracy(l, preds, weights=mask)[1])) sess.run(tf.initialize_local_variables()) for i in range(FLAGS.eval_steps): tf.logging.info( "\tEvaluating batch [%d / %d]" % (i + 1, FLAGS.eval_steps)) acc = sess.run(acc_update_op) if FLAGS.surrogate_attack: tf.logging.info("\tFinal acc: (%.4f, %.4f)" % (acc[0], acc[1])) else: tf.logging.info("\tFinal acc: %.4f" % acc) return acc
def test_make_is_span_maskable_features(self, seq_len, max_annotation_length, annotation_labels): np.random.seed(31415) annotation_labels = np.array(annotation_labels).astype(np.int32) batch_size, num_annotations = annotation_labels.shape annotation_begins = np.random.randint( seq_len, size=[batch_size, num_annotations], dtype=np.int32) annotation_length = np.random.randint( max_annotation_length, size=[batch_size, num_annotations], dtype=np.int32) annotation_ends = np.minimum(annotation_begins + annotation_length, seq_len - 1) is_annotation_mask_np = np.zeros((batch_size, seq_len), dtype=np.int32) is_annotation_cont_mask_np = np.zeros((batch_size, seq_len), dtype=np.int32) for i in range(batch_size): for j in range(seq_len): for k in range(num_annotations): if (annotation_labels[i, k] != 0 and annotation_begins[i, k] <= j and j <= annotation_ends[i, k]): is_annotation_mask_np[i, j] = 1 for i in range(batch_size): for j in range(seq_len): for k in range(num_annotations): if (annotation_labels[i, k] != 0 and annotation_begins[i, k] + 1 <= j and j <= annotation_ends[i, k]): is_annotation_cont_mask_np[i, j] = 1 is_annotation_mask_np = is_annotation_mask_np.reshape(-1) is_annotation_cont_mask_np = is_annotation_cont_mask_np.reshape(-1) def to_tensor(np_array): return tf.convert_to_tensor(np_array.reshape(-1), dtype=tf.int32) is_annotation_mask_tf_obj, is_annotation_cont_mask_tf_obj = ( input_utils.make_is_span_maskable_features( batch_size, seq_len, num_annotations, to_tensor(annotation_begins), to_tensor(annotation_ends), to_tensor(annotation_labels), )) is_annotation_mask_tf, is_annotation_cont_mask_tf = self.evaluate( tf.tuple( (is_annotation_mask_tf_obj, is_annotation_cont_mask_tf_obj))) self.assertAllEqual(is_annotation_mask_np, is_annotation_mask_tf) self.assertAllEqual(is_annotation_cont_mask_np, is_annotation_cont_mask_tf)
def birnn(cell, inputs, sequence_length, initial_state_fw=None, initial_state_bw=None, ff_keep_prob=1., recur_keep_prob=1., enforce_dropout=False, dtype=tf.float32, scope=None): """ """ # Forward direction with tf.variable_scope(scope or 'BiRNN_FW') as fw_scope: output_fw, output_state_fw = rnn(cell, inputs, sequence_length, initial_state_fw, ff_keep_prob, recur_keep_prob, enforce_dropout, dtype, scope=fw_scope) # Backward direction rev_inputs = tf.reverse_sequence(inputs, sequence_length, 1, 0) with tf.variable_scope(scope or 'BiRNN_BW') as bw_scope: output_bw, output_state_bw = rnn(cell, rev_inputs, sequence_length, initial_state_bw, ff_keep_prob, recur_keep_prob, enforce_dropout, dtype, scope=bw_scope) output_bw = tf.reverse_sequence(output_bw, sequence_length, 1, 0) # Concat each of the forward/backward outputs outputs = tf.concat([output_fw, output_bw], 2) return outputs, tf.tuple([output_state_fw, output_state_bw])
def build(self, eta, loss, metrics): """Constructs the model's graph from the provided layers with the specified loss and metrics. Args: eta (float): A scalar representing the learning rate for stochastic gradient descent. loss (Layer): A layer used to construct the objective for stochastic gradient descent. metrics (list of Layers): A list of layers to use when evaluating model performance. """ # This ensures that the graph we add our variables to the graph # unique to the model. with self.graph.as_default(): self.X = tf.placeholder(name='X', shape=(self.m, None), dtype=tf.float32) self.Y = tf.placeholder(name='Y', shape=(self.n, None), dtype=tf.float32) for layer in self.layers + [loss] + metrics: layer.build() self.forward = self.build_forward(self.X) self.loss_forward = loss.build_forward(self.forward, self.Y) self.metrics_forward = tf.tuple([metric.build_forward(self.forward, self.Y) for metric in metrics]) loss_backward = loss.build_backward() self.build_backward(loss_backward) self.build_sgd_step(eta) initializer = tf.global_variables_initializer() # This initializes the variables in our graph using the current # instance session self.sess.run(initializer)
def __call__(self, dataset, moving_params=None): """""" vocabs = dataset.vocabs inputs = dataset.inputs targets = dataset.targets reuse = (moving_params is not None) self.tokens_to_keep3D = tf.expand_dims( tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2) self.sequence_lengths = tf.reshape( tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1]) self.n_tokens = tf.reduce_sum(self.sequence_lengths) self.moving_params = moving_params word_inputs, pret_inputs = vocabs[0].embedding_lookup( inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params) tag_inputs = vocabs[1].embedding_lookup( inputs[:, :, 2], moving_params=self.moving_params) top_recur = self.embed_concat(word_inputs + pret_inputs, tag_inputs) for i in xrange(self.n_recur): with tf.variable_scope('RNN%d' % i, reuse=reuse): top_recur, _ = self.RNN(top_recur) top_mlp = top_recur with tf.variable_scope('MLP0', reuse=reuse): parse_mlp, rel_mlp = self.double_MLP(top_mlp, n_splits=2) with tf.variable_scope('Parses', reuse=reuse): parse_logits = tf.squeeze(self.linear_classifier(parse_mlp, 1)) parse_output = self.output(parse_logits, targets[:, :, 1]) if moving_params is None: predictions = targets[:, :, 1] else: predictions = parse_output['predictions'] with tf.variable_scope('Rels', reuse=reuse): rel_logits, rel_logits_cond = self.conditional_linear_classifier( rel_mlp, len(vocabs[2]), predictions) rel_output = self.output(rel_logits, targets[:, :, 2]) rel_output['probabilities'] = self.conditional_probabilities( rel_logits_cond, transpose=False) output = {} output['probabilities'] = tf.tuple( [parse_output['probabilities'], rel_output['probabilities']]) output['predictions'] = tf.stack( [parse_output['predictions'], rel_output['predictions']]) output['correct'] = parse_output['correct'] * rel_output['correct'] output['tokens'] = parse_output['tokens'] output['n_correct'] = tf.reduce_sum(output['correct']) output['n_tokens'] = self.n_tokens output['accuracy'] = output['n_correct'] / output['n_tokens'] output['loss'] = parse_output['loss'] + rel_output['loss'] output['embed'] = tf.stack([word_inputs, tag_inputs]) output['recur'] = top_recur output['parse_mlp'] = parse_mlp output['rel_mlp'] = rel_mlp output['parse_logits'] = parse_logits output['rel_logits'] = rel_logits return output
def test_masked_lm_metrics(self, block_ids): np.random.seed(31415) if isinstance(block_ids, list): batch_size = len(block_ids) block_ids_np = np.array(block_ids).astype(np.int32) else: batch_size = block_ids block_ids_np = np.random.randint(10, size=[batch_size], dtype=np.int32) multi_block_mask_np = np.zeros(batch_size, dtype=np.float32) for i in range(batch_size): if block_ids_np[i] == 0: continue for j in range(batch_size): if i != j and block_ids_np[i] == block_ids_np[j]: multi_block_mask_np[i] = 1 single_block_mask_np = 1 - multi_block_mask_np mlm_loss_per_sample_np = np.random.random(batch_size).astype( np.float32) mlm_accuracy_per_sample_np = np.random.random(batch_size).astype( np.float32) mlm_weight_per_sample_np = np.random.random(batch_size).astype( np.float32) block_ids_tf = tf.compat.v1.placeholder_with_default(block_ids_np, shape=[None]) mlm_loss_per_sample_tf = tf.compat.v1.placeholder_with_default( mlm_loss_per_sample_np, shape=[None]) mlm_accuracy_per_sample_tf = tf.compat.v1.placeholder_with_default( mlm_accuracy_per_sample_np, shape=[None]) mlm_weight_per_sample_tf = tf.compat.v1.placeholder_with_default( mlm_weight_per_sample_np, shape=[None]) metric_dict = metric_utils.masked_lm_metrics( mlm_loss_per_sample_tf, mlm_accuracy_per_sample_tf, mlm_weight_per_sample_tf, block_ids_tf, mlm_loss_per_entity_sample=None, mlm_accuracy_per_entity_sample=None, mlm_weight_per_entity_sample=None, mlm_loss_per_non_entity_sample=None, mlm_accuracy_per_non_entity_sample=None, mlm_weight_per_non_entity_sample=None, is_train=True, metrics_name="abracadabra") (masked_lm_loss, masked_lm_accuracy, masked_lm_loss_multi_blocks, masked_lm_loss_single_blocks, masked_lm_accuracy_multi_blocks, masked_lm_accuracy_single_blocks, pct_multi_blocks, pct_single_blocks) = self.evaluate( tf.tuple((metric_dict["abracadabra/mlm_loss"], metric_dict["abracadabra/mlm_accuracy"], metric_dict["abracadabra/mlm_loss_multi_blocks"], metric_dict["abracadabra/mlm_loss_single_blocks"], metric_dict["abracadabra/mlm_accuracy_multi_blocks"], metric_dict["abracadabra/mlm_accuracy_single_blocks"], metric_dict["abracadabra/pct_multi_blocks"], metric_dict["abracadabra/pct_single_blocks"]))) def weighted_avg(values, weights): return values.dot(weights) / (weights.sum() + 1e-5) self.assertNear( masked_lm_loss, weighted_avg(mlm_loss_per_sample_np, mlm_weight_per_sample_np), 1e-5) self.assertNear( masked_lm_accuracy, weighted_avg(mlm_accuracy_per_sample_np, mlm_weight_per_sample_np), 1e-5) mlm_weight_per_multi_block = mlm_weight_per_sample_np * multi_block_mask_np mlm_weight_per_single_block = mlm_weight_per_sample_np * single_block_mask_np self.assertNear( masked_lm_loss_multi_blocks, weighted_avg(mlm_loss_per_sample_np, mlm_weight_per_multi_block), 1e-5) self.assertNear( masked_lm_loss_single_blocks, weighted_avg(mlm_loss_per_sample_np, mlm_weight_per_single_block), 1e-5) self.assertNear( masked_lm_accuracy_multi_blocks, weighted_avg(mlm_accuracy_per_sample_np, mlm_weight_per_multi_block), 1e-5) self.assertNear( masked_lm_accuracy_single_blocks, weighted_avg(mlm_accuracy_per_sample_np, mlm_weight_per_single_block), 1e-5) self.assertNear(pct_multi_blocks, multi_block_mask_np.mean(), 1e-5) self.assertNear(pct_single_blocks, single_block_mask_np.mean(), 1e-5)
def __call__(self, dataset, moving_params=None): """""" vocabs = dataset.vocabs inputs = dataset.inputs targets = dataset.targets reuse = (moving_params is not None) self.tokens_to_keep3D = tf.expand_dims( tf.to_float(tf.greater(inputs[:, :, 0], vocabs[0].ROOT)), 2) self.sequence_lengths = tf.reshape( tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1, 1]) self.n_tokens = tf.reduce_sum(self.sequence_lengths) self.moving_params = moving_params word_inputs, pret_inputs = vocabs[0].embedding_lookup( inputs[:, :, 0], inputs[:, :, 1], moving_params=self.moving_params) top_recur = embed_inputs = self.embed_concat(word_inputs + pret_inputs) for i in xrange(self.n_recur): with tf.variable_scope('RNN%d' % i, reuse=reuse): top_recur, _ = self.RNN(top_recur) with tf.variable_scope('MLP', reuse=reuse): dep_mlp, head_mlp = self.MLP(top_recur, self.class_mlp_size + self.attn_mlp_size, n_splits=2) dep_arc_mlp, dep_rel_mlp = dep_mlp[:, :, :self. attn_mlp_size], dep_mlp[:, :, self. attn_mlp_size:] head_arc_mlp, head_rel_mlp = head_mlp[:, :, :self. attn_mlp_size], head_mlp[:, :, self . attn_mlp_size:] with tf.variable_scope('Arcs', reuse=reuse): arc_logits = self.bilinear_classifier(dep_arc_mlp, head_arc_mlp) arc_output = self.output(arc_logits, targets[:, :, 1]) if moving_params is None: predictions = targets[:, :, 1] else: predictions = arc_output['predictions'] with tf.variable_scope('Rels', reuse=reuse): rel_logits, rel_logits_cond = self.conditional_bilinear_classifier( dep_rel_mlp, head_rel_mlp, len(vocabs[2]), predictions) rel_output = self.output(rel_logits, targets[:, :, 2]) rel_output['probabilities'] = self.conditional_probabilities( rel_logits_cond) output = {} output['probabilities'] = tf.tuple( [arc_output['probabilities'], rel_output['probabilities']]) output['predictions'] = tf.stack( [arc_output['predictions'], rel_output['predictions']]) output['correct'] = arc_output['correct'] * rel_output['correct'] output['tokens'] = arc_output['tokens'] output['n_correct'] = tf.reduce_sum(output['correct']) output['n_tokens'] = self.n_tokens output['accuracy'] = output['n_correct'] / output['n_tokens'] output['loss'] = arc_output['loss'] + rel_output['loss'] if self.word_l2_reg > 0: output['loss'] += word_loss output['embed'] = embed_inputs output['recur'] = top_recur output['dep_arc'] = dep_arc_mlp output['head_dep'] = head_arc_mlp output['dep_rel'] = dep_rel_mlp output['head_rel'] = head_rel_mlp output['arc_logits'] = arc_logits output['rel_logits'] = rel_logits return output
def test_language_model_test(self, num_positions, padding_token_id, use_label_weights, use_entity_mask, seed): np.random.seed(seed) seq_length = 13 batch_size = 7 vocab_size = 11 hidden_size = 3 embedding_size = 5 embedding_table_np = np.random.random( (vocab_size, embedding_size)).astype(np.float32) embedding_table = tf.compat.v1.placeholder_with_default( embedding_table_np, shape=[vocab_size, embedding_size]) input_tensor_np = np.random.random( (batch_size, seq_length, hidden_size)).astype(np.float32) input_tensor = tf.compat.v1.placeholder_with_default( input_tensor_np, shape=[None, None, hidden_size]) num_labels_ids = num_positions or seq_length label_ids_np = np.random.randint(vocab_size, size=[batch_size, num_labels_ids], dtype=np.int32) label_ids = tf.compat.v1.placeholder_with_default( label_ids_np, shape=[None, num_labels_ids]) if num_positions: positions_np = np.random.randint(seq_length, size=[batch_size, num_positions], dtype=np.int32) positions = tf.compat.v1.placeholder_with_default( positions_np, shape=[None, num_positions]) else: positions = None if padding_token_id is not None: pad_mask = (label_ids_np != padding_token_id).astype(np.float32) else: pad_mask = np.ones((batch_size, num_labels_ids)) if use_label_weights: label_weights_np = np.random.random( (batch_size, num_labels_ids)).astype(np.float32) label_weights = tf.compat.v1.placeholder_with_default( label_weights_np, shape=[None, num_labels_ids]) else: label_weights_np = np.ones((batch_size, num_labels_ids)) label_weights = None label_weights_np *= pad_mask if use_entity_mask: entity_mask_np = np.random.binomial(1, 0.5, size=(batch_size, num_labels_ids)) entity_mask = tf.compat.v1.placeholder_with_default( entity_mask_np.astype(np.float32), shape=[None, num_labels_ids]) non_entity_mask = 1 - entity_mask else: entity_mask = None non_entity_mask = None loss_fn = losses.LanguageModelLoss(embedding_table, activation="relu", hidden_size=hidden_size) loss_obj = loss_fn(input_tensor, label_ids, positions, label_weights, padding_token_id, entity_mask, non_entity_mask) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.evaluate(init_op) self.assertEqual( loss_fn.linear_fn.bias.name, "language_model_loss/cls/predictions/transform/dense/bias:0") self.assertEqual( loss_fn.linear_fn.kernel.name, "language_model_loss/cls/predictions/transform/dense/kernel:0") weight_np = self.evaluate(loss_fn.linear_fn.kernel) if num_positions: input_tensor_np_new = np.zeros( (batch_size, num_positions, hidden_size)) for i in range(batch_size): for j in range(num_positions): input_tensor_np_new[i, j] = input_tensor_np[i, positions_np[i, j]] input_tensor_np = input_tensor_np_new x = np.dot( input_tensor_np.reshape(batch_size * num_labels_ids, hidden_size), weight_np) x = np.maximum(x, 0) x -= x.mean(axis=1, keepdims=True) var_x = (x**2).mean(axis=1, keepdims=True) x /= np.sqrt(var_x + 0.001) logits = np.dot(x, np.transpose(embedding_table_np)) log_probs = np.log(scipy.special.softmax(logits, axis=1)).reshape( batch_size, num_labels_ids, vocab_size) loss_np = 0 mlm_loss_per_sample_np = np.zeros(batch_size) mlm_accuracy_per_sample_np = np.zeros(batch_size) mlm_loss_per_entity_sample_np = np.zeros(batch_size) mlm_accuracy_per_entity_sample_np = np.zeros(batch_size) mlm_loss_per_non_entity_sample_np = np.zeros(batch_size) mlm_accuracy_per_non_entity_sample_np = np.zeros(batch_size) for i in range(batch_size): for j in range(num_labels_ids): current_loss = -log_probs[i, j, label_ids_np[i, j]] current_loss *= label_weights_np[i, j] current_accuracy = int( np.argmax(log_probs[i, j]) == label_ids_np[i, j]) current_accuracy *= label_weights_np[i, j] loss_np += current_loss mlm_loss_per_sample_np[i] += current_loss mlm_accuracy_per_sample_np[i] += current_accuracy if use_entity_mask: if entity_mask_np[i, j] == 1: mlm_loss_per_entity_sample_np[i] += current_loss mlm_accuracy_per_entity_sample_np[ i] += current_accuracy else: mlm_loss_per_non_entity_sample_np[i] += current_loss mlm_accuracy_per_non_entity_sample_np[ i] += current_accuracy loss_np /= (label_weights_np.sum() + 1e-5) mlm_weight_per_sample_np = label_weights_np.sum(axis=1) mlm_loss_per_sample_np /= (mlm_weight_per_sample_np + 1e-5) mlm_accuracy_per_sample_np /= (mlm_weight_per_sample_np + 1e-5) if use_entity_mask: mlm_loss_per_entity_sample_np /= ( (label_weights_np * entity_mask_np).sum(axis=1) + 1e-5) mlm_accuracy_per_entity_sample_np /= ( (label_weights_np * entity_mask_np).sum(axis=1) + 1e-5) mlm_loss_per_non_entity_sample_np /= ( (label_weights_np * (1 - entity_mask_np)).sum(axis=1) + 1e-5) mlm_accuracy_per_non_entity_sample_np /= ( (label_weights_np * (1 - entity_mask_np)).sum(axis=1) + 1e-5) if use_entity_mask: (loss, mlm_loss_per_sample, mlm_accuracy_per_sample, mlm_weight_per_sample, mlm_loss_per_entity_sample, mlm_accuracy_per_entity_sample, mlm_weight_per_entity_sample, mlm_loss_per_non_entity_sample, mlm_accuracy_per_non_entity_sample, mlm_weight_per_non_entity_sample) = self.evaluate( tf.tuple((loss_obj.loss, loss_obj.mlm_loss_per_sample, loss_obj.mlm_accuracy_per_sample, loss_obj.mlm_weight_per_sample, loss_obj.mlm_loss_per_entity_sample, loss_obj.mlm_accuracy_per_entity_sample, loss_obj.mlm_weight_per_entity_sample, loss_obj.mlm_loss_per_non_entity_sample, loss_obj.mlm_accuracy_per_non_entity_sample, loss_obj.mlm_weight_per_non_entity_sample))) else: (loss, mlm_loss_per_sample, mlm_accuracy_per_sample, mlm_weight_per_sample) = self.evaluate( tf.tuple((loss_obj.loss, loss_obj.mlm_loss_per_sample, loss_obj.mlm_accuracy_per_sample, loss_obj.mlm_weight_per_sample))) self.assertAllEqual(loss.shape, []) self.assertNear(loss, loss_np, err=1e-4) self.assertAllEqual(mlm_loss_per_sample.shape, [batch_size]) self.assertArrayNear(mlm_loss_per_sample, mlm_loss_per_sample_np, err=1e-4) self.assertAllEqual(mlm_accuracy_per_sample.shape, [batch_size]) self.assertArrayNear(mlm_accuracy_per_sample, mlm_accuracy_per_sample_np, err=1e-4) self.assertAllEqual(mlm_weight_per_sample.shape, [batch_size]) self.assertArrayNear(mlm_accuracy_per_sample, mlm_accuracy_per_sample_np, err=1e-4) if use_entity_mask: self.assertArrayNear(mlm_weight_per_entity_sample, (label_weights_np * entity_mask_np).sum(axis=1), err=1e-4) self.assertArrayNear(mlm_loss_per_entity_sample, mlm_loss_per_entity_sample_np, err=1e-4) self.assertArrayNear(mlm_accuracy_per_entity_sample, mlm_accuracy_per_entity_sample_np, err=1e-4) self.assertArrayNear(mlm_weight_per_non_entity_sample, (label_weights_np * (1 - entity_mask_np)).sum(axis=1), err=1e-4) self.assertArrayNear(mlm_loss_per_non_entity_sample, mlm_loss_per_non_entity_sample_np, err=1e-4) self.assertArrayNear(mlm_accuracy_per_non_entity_sample, mlm_accuracy_per_non_entity_sample_np, err=1e-4)
def train(imPath, logPath, modelPath, pmPath, nTrain, nValid, nTest, restoreVariables, nSteps, gpuIndex, testPMIndex): os.environ['CUDA_VISIBLE_DEVICES'] = '%d' % gpuIndex outLogPath = logPath trainWriterPath = pathjoin(logPath, 'Train') validWriterPath = pathjoin(logPath, 'Valid') outModelPath = pathjoin(modelPath, 'model.ckpt') outPMPath = pmPath batchSize = UNet2D.hp['batchSize'] imSize = UNet2D.hp['imSize'] nChannels = UNet2D.hp['nChannels'] nClasses = UNet2D.hp['nClasses'] # -------------------------------------------------- # data # -------------------------------------------------- Train = np.zeros((nTrain, imSize, imSize, nChannels)) Valid = np.zeros((nValid, imSize, imSize, nChannels)) Test = np.zeros((nTest, imSize, imSize, nChannels)) LTrain = np.zeros((nTrain, imSize, imSize, nClasses)) LValid = np.zeros((nValid, imSize, imSize, nClasses)) LTest = np.zeros((nTest, imSize, imSize, nClasses)) print('loading data, computing mean / st dev') if not os.path.exists(modelPath): os.makedirs(modelPath) if restoreVariables: datasetMean = loadData(pathjoin(modelPath, 'datasetMean.data')) datasetStDev = loadData(pathjoin(modelPath, 'datasetStDev.data')) else: datasetMean = 0 datasetStDev = 0 for iSample in range(nTrain + nValid + nTest): I = im2double(tifread('%s/I%05d_Img.tif' % (imPath, iSample))) datasetMean += np.mean(I) datasetStDev += np.std(I) datasetMean /= (nTrain + nValid + nTest) datasetStDev /= (nTrain + nValid + nTest) saveData(datasetMean, pathjoin(modelPath, 'datasetMean.data')) saveData(datasetStDev, pathjoin(modelPath, 'datasetStDev.data')) perm = np.arange(nTrain + nValid + nTest) np.random.shuffle(perm) for iSample in range(0, nTrain): path = '%s/I%05d_Img.tif' % (imPath, perm[iSample]) im = im2double(tifread(path)) Train[iSample, :, :, 0] = (im - datasetMean) / datasetStDev path = '%s/I%05d_Ant.tif' % (imPath, perm[iSample]) im = tifread(path) for i in range(nClasses): LTrain[iSample, :, :, i] = (im == i + 1) for iSample in range(0, nValid): path = '%s/I%05d_Img.tif' % (imPath, perm[nTrain + iSample]) im = im2double(tifread(path)) Valid[iSample, :, :, 0] = (im - datasetMean) / datasetStDev path = '%s/I%05d_Ant.tif' % (imPath, perm[nTrain + iSample]) im = tifread(path) for i in range(nClasses): LValid[iSample, :, :, i] = (im == i + 1) for iSample in range(0, nTest): path = '%s/I%05d_Img.tif' % (imPath, perm[nTrain + nValid + iSample]) im = im2double(tifread(path)) Test[iSample, :, :, 0] = (im - datasetMean) / datasetStDev path = '%s/I%05d_Ant.tif' % (imPath, perm[nTrain + nValid + iSample]) im = tifread(path) for i in range(nClasses): LTest[iSample, :, :, i] = (im == i + 1) # -------------------------------------------------- # optimization # -------------------------------------------------- tfLabels = tf.placeholder("float", shape=[None, imSize, imSize, nClasses], name='labels') globalStep = tf.Variable(0, trainable=False) learningRate0 = 0.01 decaySteps = 1000 decayRate = 0.95 learningRate = tf.train.exponential_decay(learningRate0, globalStep, decaySteps, decayRate, staircase=True) with tf.name_scope('optim'): loss = tf.reduce_mean( -tf.reduce_sum(tf.multiply(tfLabels, tf.log(UNet2D.nn)), 3)) updateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # optimizer = tf.train.MomentumOptimizer(1e-3,0.9) optimizer = tf.train.MomentumOptimizer(learningRate, 0.9) # optimizer = tf.train.GradientDescentOptimizer(learningRate) with tf.control_dependencies(updateOps): optOp = optimizer.minimize(loss, global_step=globalStep) with tf.name_scope('eval'): error = [] for iClass in range(nClasses): labels0 = tf.reshape( tf.to_int32( tf.slice(tfLabels, [0, 0, 0, iClass], [-1, -1, -1, 1])), [batchSize, imSize, imSize]) predict0 = tf.reshape( tf.to_int32(tf.equal(tf.argmax(UNet2D.nn, 3), iClass)), [batchSize, imSize, imSize]) correct = tf.multiply(labels0, predict0) nCorrect0 = tf.reduce_sum(correct) nLabels0 = tf.reduce_sum(labels0) error.append(1 - tf.to_float(nCorrect0) / tf.to_float(nLabels0)) errors = tf.tuple(error) # -------------------------------------------------- # inspection # -------------------------------------------------- with tf.name_scope('scalars'): tf.summary.scalar('avg_cross_entropy', loss) for iClass in range(nClasses): tf.summary.scalar('avg_pixel_error_%d' % iClass, error[iClass]) tf.summary.scalar('learning_rate', learningRate) with tf.name_scope('images'): split0 = tf.slice(UNet2D.nn, [0, 0, 0, 0], [-1, -1, -1, 1]) split1 = tf.slice(UNet2D.nn, [0, 0, 0, 1], [-1, -1, -1, 1]) if nClasses > 2: split2 = tf.slice(UNet2D.nn, [0, 0, 0, 2], [-1, -1, -1, 1]) tf.summary.image('pm0', split0) tf.summary.image('pm1', split1) if nClasses > 2: tf.summary.image('pm2', split2) merged = tf.summary.merge_all() # -------------------------------------------------- # session # -------------------------------------------------- saver = tf.train.Saver() sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True) ) # config parameter needed to save variables when using GPU if os.path.exists(outLogPath): shutil.rmtree(outLogPath) trainWriter = tf.summary.FileWriter(trainWriterPath, sess.graph) validWriter = tf.summary.FileWriter(validWriterPath, sess.graph) if restoreVariables: saver.restore(sess, outModelPath) print("Model restored.") else: sess.run(tf.global_variables_initializer()) # -------------------------------------------------- # train # -------------------------------------------------- batchData = np.zeros((batchSize, imSize, imSize, nChannels)) batchLabels = np.zeros((batchSize, imSize, imSize, nClasses)) for i in range(nSteps): # train perm = np.arange(nTrain) np.random.shuffle(perm) for j in range(batchSize): batchData[j, :, :, :] = Train[perm[j], :, :, :] batchLabels[j, :, :, :] = LTrain[perm[j], :, :, :] summary, _ = sess.run( [merged, optOp], feed_dict={ UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 1 }) trainWriter.add_summary(summary, i) # validation perm = np.arange(nValid) np.random.shuffle(perm) for j in range(batchSize): batchData[j, :, :, :] = Valid[perm[j], :, :, :] batchLabels[j, :, :, :] = LValid[perm[j], :, :, :] summary, es = sess.run( [merged, errors], feed_dict={ UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 0 }) validWriter.add_summary(summary, i) e = np.mean(es) print('step %05d, e: %f' % (i, e)) if i == 0: if restoreVariables: lowestError = e else: lowestError = np.inf if np.mod(i, 100) == 0 and e < lowestError: lowestError = e print("Model saved in file: %s" % saver.save(sess, outModelPath)) # -------------------------------------------------- # test # -------------------------------------------------- if not os.path.exists(outPMPath): os.makedirs(outPMPath) for i in range(nTest): j = np.mod(i, batchSize) batchData[j, :, :, :] = Test[i, :, :, :] batchLabels[j, :, :, :] = LTest[i, :, :, :] if j == batchSize - 1 or i == nTest - 1: output = sess.run(UNet2D.nn, feed_dict={ UNet2D.tfData: batchData, tfLabels: batchLabels, UNet2D.tfTraining: 0 }) for k in range(j + 1): pm = output[k, :, :, testPMIndex] gt = batchLabels[k, :, :, testPMIndex] im = np.sqrt(normalize(batchData[k, :, :, 0])) imwrite( np.uint8(255 * np.concatenate( (im, np.concatenate((pm, gt), axis=1)), axis=1)), '%s/I%05d.png' % (outPMPath, i - j + k + 1)) # -------------------------------------------------- # save hyper-parameters, clean-up # -------------------------------------------------- saveData(UNet2D.hp, pathjoin(modelPath, 'hp.data')) trainWriter.close() validWriter.close() sess.close()
def __call__(self, dataset, moving_params=None): """""" vocabs = dataset.vocabs inputs = dataset.inputs targets = dataset.targets reuse = (moving_params is not None) self.tokens_to_keep3D = tf.expand_dims(tf.to_float(tf.greater(inputs[:,:,0], vocabs[0].ROOT)), 2) self.sequence_lengths = tf.reshape(tf.reduce_sum(self.tokens_to_keep3D, [1, 2]), [-1,1]) self.n_tokens = tf.reduce_sum(self.sequence_lengths) self.moving_params = moving_params word_inputs, pret_inputs = vocabs[0].embedding_lookup(inputs[:,:,0], inputs[:,:,1], moving_params=self.moving_params) tag_inputs = vocabs[1].embedding_lookup(inputs[:,:,2], moving_params=self.moving_params) if self.add_to_pretrained and not self.char_based: word_inputs += pret_inputs if self.word_l2_reg > 0: unk_mask = tf.expand_dims(tf.to_float(tf.greater(inputs[:,:,1], vocabs[0].UNK)),2) word_loss = self.word_l2_reg*tf.nn.l2_loss((word_inputs - pret_inputs) * unk_mask) embed_inputs = self.embed_concat(word_inputs, tag_inputs) top_recur = embed_inputs recur_diag_bilin = False#self.recur_diag_bilin and tag_inputs.get_shape().as_list()[-1] == word_inputs.get_shape().as_list()[-1] for i in xrange(self.n_recur): with tf.variable_scope('RNN%d' % i, reuse=reuse): top_recur, _ = self.RNN(top_recur, recur_diag_bilin=recur_diag_bilin) recur_diag_bilin = self.recur_diag_bilin if self.attn_based: top_recur = self.soft_attn(top_recur, recur_diag_bilin=recur_diag_bilin) recur_diag_bilin = False with tf.variable_scope('Arcs', reuse=reuse): arc_logits = self.bilinear_classifier(top_recur,top_recur) arc_output = self.output(arc_logits, targets[:,:,1]) if moving_params is None: predictions = targets[:,:,1] else: predictions = arc_output['predictions'] with tf.variable_scope('Rels', reuse=reuse): rel_logits, rel_logits_cond = self.conditional_bilinear_classifier(top_recur, top_recur, len(vocabs[2]), predictions) rel_output = self.output(rel_logits, targets[:,:,2]) rel_output['probabilities'] = self.conditional_probabilities(rel_logits_cond) output = {} output['probabilities'] = tf.tuple([arc_output['probabilities'], rel_output['probabilities']]) output['predictions'] = tf.stack([arc_output['predictions'], rel_output['predictions']]) output['correct'] = arc_output['correct'] * rel_output['correct'] output['tokens'] = arc_output['tokens'] output['n_correct'] = tf.reduce_sum(output['correct']) output['n_tokens'] = self.n_tokens output['accuracy'] = output['n_correct'] / output['n_tokens'] output['loss'] = arc_output['loss'] + rel_output['loss'] if self.word_l2_reg > 0: output['loss'] += word_loss output['embed'] = embed_inputs output['recur'] = top_recur output['arc_logits'] = arc_logits output['rel_logits'] = rel_logits return output