def main(_): """Print out the FLAGS in the main function.""" logging.info("param = %s", FLAGS.param) if FLAGS.action == "echo": logging.warning(FLAGS.echo_text) elif FLAGS.action == "echo_bool": logging.info("Just do it? %s", "Yes!" if FLAGS.just_do_it else "No :(")
def test_integration_log(self): # pylint: disable=no-self-use """This test simply should not return any error.""" logging.info("This is a info test message.") logging.warning("This is a warning test message.") logging.error("This is an error message.") logging.log(logging.INFO, "This is just another info test message.") logging.debug("This is a debug test message.")
def read_data(source_path, buckets, max_size=None): """Read data from source and target files and put into buckets. Args: source_path: path to the files with token-ids for the source language. max_size: maximum number of lines to read, all other will be ignored; if 0 or None, data files will be read completely (no limit). Returns: data_set: a list of length len(_buckets); data_set[n] contains a list of (source, target) pairs read from the provided data files that fit into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and len(target) < _buckets[n][1]; source and target are lists of token-ids. """ data_set = [[] for _ in buckets] with tf.gfile.GFile(source_path, mode="r") as source_file: source = source_file.readline() counter = 0 while source and (not max_size or counter < max_size): counter += 1 if counter % 100000 == 0: logging.info(" reading data line %d" % counter) sys.stdout.flush() source_ids = [int(x) for x in source.split()] target_ids = [int(x) for x in source.split()] target_ids.append(EOS_ID) for bucket_id, (source_size, target_size) in enumerate(buckets): if len(source_ids) < source_size and len( target_ids) < target_size: data_set[bucket_id].append([source_ids, target_ids]) break source = source_file.readline() return data_set
def load_model_from_files( # pylint: disable=too-many-arguments cls, model_file, checkpoint_dir, forward_only, restore_all_vars=True, pretrain_model_path="", hparams_dict={}, sess=None): """Load model from file.""" hparams = build_base_hparams() if os.path.exists(model_file): logging.info("Loading seq2seq model definition from %s..." % model_file) with open(model_file, "r") as fobj: model_dict = json.load(fobj) model_dict["buckets"] = [ tuple(_bucket) for _bucket in model_dict["buckets"] ] hparams.set_from_map(model_dict) else: logging.info("Initializing a fresh training...") hparams.set_from_map(hparams_dict) model = cls(hparams, forward_only) # Load model weights. ckpt = tf.train.get_checkpoint_state(checkpoint_dir) sess = sess or tf.get_default_session() if pretrain_model_path: if tf.gfile.IsDirectory(pretrain_model_path): pretrain_model_path = os.path.join(pretrain_model_path, "weights") pretrain_ckpt = tf.train.get_checkpoint_state( pretrain_model_path) pretrain_model_path = pretrain_ckpt.model_checkpoint_path logging.info( "Loading pretrained model weights from checkpoint: %s" % pretrain_model_path) if restore_all_vars: model.saver_sup.restore(sess, pretrain_model_path) else: # This is an ugly workaround to load pretrain model for part of # the models. sess.run(tf.global_variables_initializer()) model.saver_unsup.restore(sess, pretrain_model_path) elif ckpt: logging.info("Loading model weights from checkpoint_dir: %s" % checkpoint_dir) if restore_all_vars: model.saver_sup.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) model.saver_unsup.restore(sess, ckpt.model_checkpoint_path) else: logging.info("Initialize fresh parameters...") sess.run(tf.global_variables_initializer()) return model
def train(hparams, data_hparams): vocab = Vocabulary.get_default_vocab(not data_hparams.skip_at_symbol) # Create global step variable first. train_data, val_data, test_data = make_train_data( json.loads(FLAGS.dataset_spec), vocab, data_hparams, FLAGS.epochs) model = DiscoveryModel(data_hparams, hparams, vocab) train_outputs, _, _ = model.build_train_graph(train_data) seq_loss_op, train_op = model.build_train_loss(train_data, train_outputs) with tf.control_dependencies([val_data.initializer, test_data.initializer]): _, val_ctr_smile_op, val_sampled_smiles_op = model.build_val_net( val_data.get_next()) model.build_test_net(val_ctr_smile_op, val_sampled_smiles_op, test_data.get_next()) train_summary_ops = tf.summary.merge(tf.get_collection("train_summaries")) val_summary_ops = tf.summary.merge(tf.get_collection("val_summaries")) test_summary_ops = tf.summary.merge(tf.get_collection("test_summaries")) stale_global_step_op = tf.train.get_or_create_global_step() with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir or None, save_checkpoint_steps=FLAGS.steps_per_checkpoint or None, log_step_count_steps=FLAGS.steps_per_checkpoint or None) as sess: if FLAGS.train_dir: summary_writer = tf.summary.FileWriterCache.get(FLAGS.train_dir) else: summary_writer = None # step = 0 while not sess.should_stop(): # while step < 10: # step += 1 stale_global_step, seq_loss, _, train_summary = sess.run([ stale_global_step_op, seq_loss_op, train_op, train_summary_ops ]) if summary_writer is not None: summary_writer.add_summary(train_summary, stale_global_step) # Run validation and test. # Trigger test events. if stale_global_step % FLAGS.steps_per_checkpoint == 0: # if True: try: sess.run([val_data.initializer, test_data.initializer]) _, _ = sess.run([val_summary_ops, test_summary_ops]) # The monitored training session will pick up the summary # and automatically add them. except Exception as ex: logging.error(str(ex)) raise except tf.errors.OutOfRangeError: logging.info("Test finished. Continue training.") continue logging.info("Coordinator request to stop.")
def main(_): """Entry function for the script.""" if FLAGS.action == "sample": raise NotImplementedError elif FLAGS.action == "fp": result = [] for _ in range(FLAGS.repeat_num): tf.reset_default_graph() result.append(fp_decode()) em_acc, acc = zip(*result) logging.info("EM Acc: %s" % ", ".join(["%.8f" % x for x in em_acc])) logging.info("Acc: %s" % ", ".join(["%.8f" % x for x in acc])) else: print("Unsupported action: %s" % FLAGS.action)
def read_data_labels(source_path, label_path, reg_flag, num_prop, buckets, max_size=None): # pylint: disable=too-many-locals """Read data from source and target files and put into buckets. Args: source_path: path to the files with token-ids for the source language. label_path: path to the labels max_size: maximum number of lines to read, all other will be ignored; if 0 or None, data files will be read completely (no limit). Returns: data_set: a list of length len(_buckets); data_set[n] contains a list of (source, target) pairs read from the provided data files that fit into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and len(target) < _buckets[n][1]; source and target are lists of token-ids. """ data_set = [[] for _ in buckets] with tf.gfile.GFile(source_path) as source_file, tf.gfile.GFile( label_path) as label_file: # pylint: disable=bad-continuation source = source_file.readline().strip() label = label_file.readline().strip() counter = 0 while (source and label) and (not max_size or counter < max_size): counter += 1 if counter % 100000 == 0: logging.info(" reading data line %d" % counter) sys.stdout.flush() source_ids = [int(x) for x in source.split()] target_ids = [int(x) for x in source.split()] if reg_flag: if num_prop > 1: label_ids = [float(x) for x in label.split()] else: label_ids = float(label) else: label_ids = int(label) target_ids.append(EOS_ID) for bucket_id, (source_size, target_size) in enumerate(buckets): if len(source_ids) < source_size and len( target_ids) < target_size: data_set[bucket_id].append( [source_ids, target_ids, label_ids]) break source = source_file.readline().strip() label = label_file.readline().strip() return data_set
def show_event_file(event_file): try: it = tf.train.summary_iterator(event_file) except: logging.error("Corrupted file: " % event_file) return for event in it: if event.step == FLAGS.step: for v in event.summary.value: if v.tensor and v.tensor.string_val: if FLAGS.tag and FLAGS.tag != v.tag: continue if FLAGS.tag: print("\n".join(v.tensor.string_val).replace( ", ", ",")) break logging.info(v.tag) logging.info("\n".join(v.tensor.string_val))
def fp_decode(): """Decode ALL samples from the given data file and output to file.""" # TODO(zhengxu): An ugly workaround to ensure the output path is optional. output_path = FLAGS.output_path or NamedTemporaryFile(delete=False).name with tf.Session() as sess, open(output_path, "w") as fout: all_smiles = SMISingleTaskReader( dataset_cols=FLAGS.dataset_headers.split(","), cls_thres=FLAGS.cls_thres).read(FLAGS.data_path) fetcher = FingerprintFetcher(FLAGS.model_dir, FLAGS.vocab_path, sess) exact_match_num = 0 acc_count = 0 # Note here the idx is the row index in the dataset. # So it might not be robust to dataset shuffle. for idx, (smile, label) in all_smiles.iterrows(): seq2seq_fp, output_smile, acc = fetcher.decode(smile, label) acc_count += acc["accuracy"] if output_smile == smile: exact_match_num += 1 if FLAGS.output_path: fout.write(" ".join([str(fp_bit) for fp_bit in seq2seq_fp]) + "\n") if idx % 200 == 0 and idx: logging.info("Progress: %d/%d" % (idx, len(all_smiles))) final_em_acc = float(exact_match_num) / len(all_smiles) final_acc = float(acc_count) / len(all_smiles) logging.info("Exact match count: %d/%d, %.4f%%" % (exact_match_num, len(all_smiles), 100. * final_em_acc)) logging.info("Accuracy: %d/%d, %.4f%%" % (acc_count, len(all_smiles), 100. * final_acc)) return final_em_acc, final_acc
def save_model_to_files( # pylint: disable=too-many-arguments self, model_file, checkpoint_file, save_all_vars, sess=None, verbose=False): """Save all the model hyper-parameters to a json file.""" if verbose: logging.info("Save model defintion to %s..." % model_file) model_dict = { key: getattr(self, key) for key in self.MODEL_PARAMETER_FIELDS } with open(model_file, "w") as fobj: json.dump(model_dict, fobj) checkpoint_dir = os.path.dirname(checkpoint_file) if os.path.exists(checkpoint_dir): if verbose: logging.info("Save weights to %s..." % checkpoint_file) sess = sess or tf.get_default_session() if save_all_vars: self.saver_sup.save(sess, checkpoint_file, global_step=self.global_step) else: self.saver_unsup.save(sess, checkpoint_file, global_step=self.global_step) elif verbose: logging.info( "Skip save weights to %s since the dir does not exist." % checkpoint_dir)
def train( # pylint: disable=too-many-locals,too-many-statements,too-many-arguments train_data, test_data, train_labels, test_labels, restore_all_vars, save_all_vars): """Train script.""" model_dir = FLAGS.model_dir batch_size = FLAGS.batch_size config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True hparams_override = json.loads(FLAGS.hparams) if FLAGS.hparams else dict() # Override some hparams results. hparams_override["label_states"] = (bool(train_labels) and bool(test_labels)) hparams_override["batch_size"] = hparams_override.get( "batch_size", batch_size) with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % FLAGS.gpu): # Create model. model = seq3seq_model.Seq3SeqModel.load_model_from_dir( model_dir, False, restore_all_vars, pretrain_model_path=FLAGS.pretrain_model_path, hparams_dict=hparams_override, sess=sess) if FLAGS.reset_lr > 0.: logging.info("Resetting LR to %.10f..." % FLAGS.reset_lr) sess.run(model.learning_rate_op.assign(FLAGS.reset_lr)) if FLAGS.reset_global_step: logging.info("Reset global step to 0.") sess.run(model.global_step.assign(0)) buckets = model.buckets reg = model.reg num_prop = model.num_prop alpha = model.alpha # Get coefficient for combined loss function label_states = model.hparams.label_states # Read data into buckets and compute their sizes. if model.hparams.label_states: logging.info("Reading train data from %s..." % train_data) train_label_set = read_data_labels(train_data, train_labels, reg, num_prop, buckets) logging.info("Reading test data from %s..." % test_data) test_label_set = read_data_labels(test_data, test_labels, reg, num_prop, buckets) else: logging.info("Reading train data from %s..." % train_data) train_label_set = read_data(train_data, buckets) logging.info("Reading test data from %s..." % test_data) test_label_set = read_data(test_data, buckets) train_bucket_sizes = [ len(train_label_set[b]) for b in range(len(buckets)) ] train_total_size = float(sum(train_bucket_sizes)) train_bucket_prob = [ size / train_total_size for size in train_bucket_sizes ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] if FLAGS.summary_dir: train_writer = tf.summary.FileWriter( os.path.join(FLAGS.summary_dir, "train"), sess.graph) test_writer = tf.summary.FileWriter( os.path.join(FLAGS.summary_dir, "test"), sess.graph) else: logging.warning( "You do not specify any summary directory. Reliance on log file" " might be unstable and dangerous.") train_writer = None test_writer = None test_summary_ops = model.test_summary_ops while model.learning_rate_op.eval() > FLAGS.min_lr_threshold: # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. bucket_id = np.random.choice(len(train_bucket_prob), p=train_bucket_prob) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, labels, target_weights = model.get_batch( train_label_set, bucket_id, label_states) _, step_loss, step_loss_sup = model.step( # pylint: disable=unused-variable sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, train_writer, False, False, labels) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += ((1.0 - alpha) * step_loss + alpha * step_loss_sup) / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") logging.info( "global step %d learning rate %.6f step-time %.6f perplexity" "%.6f" % (model.global_step.eval(), model.learning_rate_op.eval(), step_time, perplexity)) logging.info(" loss_unsupervised: %s loss_supervised: %s" % (str(step_loss), str(step_loss_sup))) # Decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. model.save_model_to_dir(model_dir, save_all_vars, sess=sess) step_time, loss = 0.0, 0.0 # Run a full evaluation on the test dataset. eval_dataset(test_label_set, model, label_states, num_prop, test_writer=test_writer, sess=sess) sys.stdout.flush() if FLAGS.max_step and current_step >= FLAGS.max_step: break
def eval_dataset(test_label_set, model, label_states, num_prop, test_writer=None, sess=None): """Perform an evaluation on the test dataset.""" sess = sess or tf.get_default_session() if num_prop == 1: acms = AccumulatorWithBuckets() else: acms = [] for i in range(num_prop): acms.append(AccumulatorWithBuckets()) for bucket_id in range(len(test_label_set)): length_test_set = len(test_label_set[bucket_id]) if length_test_set == 0: logging.info(" eval: empty bucket %d" % (bucket_id)) continue batch_size = model.batch_size # Iterate all the data inside the bucket. for start_idx in range(0, length_test_set, batch_size): # TODO(zhengxu): Provide an option to eval a subset of each bucket for speed. tmp_data = [None] * len(test_label_set) actual_data_len = (min(length_test_set, start_idx + batch_size) - start_idx) tmp_data[bucket_id] = test_label_set[bucket_id][ start_idx:start_idx + actual_data_len] encoder_inputs, decoder_inputs, eval_labels, target_weights = ( model.get_batch(tmp_data, bucket_id, label_states)) _, eval_loss, eval_acc_sup, output_logits = model.step( sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, test_writer, forward_only=True, output_encoder_states=False, encoder_labels=eval_labels) if eval_acc_sup is not None: if num_prop == 1: for idx in eval_acc_sup: acms.get(idx, bucket_id).accumulate(actual_data_len, eval_acc_sup[idx]) else: for i in range(num_prop): for idx in eval_acc_sup[i]: acms[i].get(idx, bucket_id).accumulate( actual_data_len, eval_acc_sup[i][idx]) if eval_loss is not None: if num_prop == 1: acms.get("eval_loss", bucket_id).accumulate(actual_data_len, eval_loss) else: acms[0].get("eval_loss", bucket_id).accumulate(actual_data_len, eval_loss) input_ph, output_ph, em_acc_op, summary_op = model.test_summary_ops[ bucket_id] em_acc, summary = sess.run( [em_acc_op, summary_op], feed_dict={ input_ph: np.array(encoder_inputs), output_ph: np.array(output_logits) }) if em_acc is not None: if num_prop == 1: acms.get("em_acc", bucket_id).accumulate(actual_data_len, em_acc) else: acms[0].get("em_acc", bucket_id).accumulate(actual_data_len, em_acc) if num_prop == 1: eval_ppx = math.exp(float(acms.get( "eval_loss", bucket_id).value)) if eval_loss < 300 else float("inf") else: eval_ppx = math.exp( float(acms[0].get( "eval_loss", bucket_id).value)) if eval_loss < 300 else float("inf") logging.info(" eval: bucket %d perplexity %.6f" % (bucket_id, eval_ppx)) if num_prop == 1: logging.info(" eval: " + ",".join([ "%s %.6e " % (key, val[bucket_id].value) for key, val in acms.acumulators.items() ])) else: for i in range(num_prop): logging.info(" eval: Property(%d) " % (i + 1) + ",".join([ "%s %.6e " % (key, val[bucket_id].value) for key, val in acms[i].acumulators.items() ])) # Add summary and calculate the overall evaluation metrics. if num_prop == 1: overall_acms = add_eval_summary(test_writer, model.global_step.eval(), acms.acumulators) logging.info(" eval: overall " + ", ".join( ["%s %.4e" % (k, v.value) for k, v in overall_acms.items()])) else: overall_acms = [] for i in range(num_prop): overall_acms.append( add_eval_summary(test_writer, model.global_step.eval(), acms[i].acumulators)) logging.info(" eval: overall Property(%d) " % (i + 1) + ", ".join( ["%s %.4e" % (k, v.value) for k, v in overall_acms[i].items()]))
def __init__( # pylint: disable=too-many-locals, too-many-arguments, too-many-branches, super-init-not-called, too-many-statements self, hparams, forward_only=False, num_samples=512, dtype=tf.float32): """Create the model. Args: hparams: Hyperparameters used to contruct the nerual network. num_samples: number of samples for sampled softmax. forward_only: if set, we do not construct the backward pass in the model. dtype: the data type to use to store internal variables. """ self.hparams = hparams self.source_vocab_size = hparams.source_vocab_size self.target_vocab_size = hparams.target_vocab_size self.buckets = hparams.buckets self.size = hparams.size self.num_layers = hparams.num_layers self.max_gradient_norm = hparams.max_gradient_norm self.batch_size = hparams.batch_size self.learning_rate = hparams.learning_rate self.learning_rate_decay_factor = hparams.learning_rate_decay_factor self.learning_rate_op = tf.Variable(float(self.learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate_op.assign( self.learning_rate_op * hparams.learning_rate_decay_factor) self.dropout_rate = hparams.dropout_rate self.label_states = hparams.label_states self.alpha = hparams.alpha # Get coefficient for combined loss function self.global_step = tf.Variable(0, trainable=False) self.reg = hparams.reg self.num_prop = hparams.num_prop logging.info("Initializing model with hparams: %s" % str(self.hparams.to_json())) size = hparams.size buckets = hparams.buckets dropout_rate = hparams.dropout_rate num_layers = hparams.num_layers # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < self.target_vocab_size: w_t = tf.get_variable("proj_w", [self.target_vocab_size, hparams.size], dtype=dtype) w = tf.transpose(w_t) b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype) output_projection = (w, b) def sampled_loss(labels, logits): """Sampleed loss function.""" labels = tf.reshape(labels, [-1, 1]) # We need to compute the sampled_softmax_loss using 32bit floats to # avoid numerical instabilities. local_w_t = tf.cast(w_t, tf.float32) local_b = tf.cast(b, tf.float32) local_inputs = tf.cast(logits, tf.float32) return tf.cast( tf.nn.sampled_softmax_loss(local_w_t, local_b, labels, local_inputs, num_samples, self.target_vocab_size), dtype) softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. def single_cell(): """internal single cell for RNN""" cell_cls_name = "%sCell" % hparams.rnn_cell cell_cls = getattr(tf.contrib.rnn, cell_cls_name) ret = cell_cls(hparams.size) ret = tf.nn.rnn_cell.DropoutWrapper(ret, input_keep_prob=dropout_rate, output_keep_prob=dropout_rate) return ret self._fp_tensors = [] # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): """Sequence to sequence function.""" cell = single_cell() if num_layers > 1: cell = tf.contrib.rnn.MultiRNNCell( [single_cell() for _ in range(num_layers)]) outputs, encoder_state, decoder_state = embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=hparams.source_vocab_size, num_decoder_symbols=hparams.target_vocab_size, embedding_size=size, output_projection=output_projection, feed_previous=do_decode, dtype=dtype) self._fp_tensors.append(encoder_state) return outputs, decoder_state def pred_net(bucket_id, encoder_labels): """Build prediction network.""" fp_tensor = self.get_fingerprint_tensor(bucket_id) # Prediction network definition. pred_net_cls = getattr(pred_models, hparams.pred_net_type) if (self.num_prop == 1): pred = pred_net_cls(hparams)(fp_tensor, reuse=(bucket_id > 0)) elif (self.num_prop > 1): pred_mprop = pred_net_cls(hparams)(fp_tensor, reuse=(bucket_id > 0)) pred = pred_mprop[0] # Prediction loss. loss_cls = getattr(losses, hparams.loss_type) loss_sup = loss_cls(hparams)( input_tensor=pred if self.num_prop == 1 else pred_mprop, label_tensor=encoder_labels) # Metrics. metric_cls = getattr(metrics, hparams.metric_type) if (self.num_prop == 1): metric_ops = metric_cls(hparams)(input_tensor=pred, label_tensor=encoder_labels) elif (self.num_prop > 1): metric_ops_mprop = [] pred_mprop = tf.transpose(pred_mprop, [1, 0]) for i in range(self.num_prop): metric_ops_mprop.append( metric_cls(hparams)(input_tensor=pred_mprop[i], label_tensor=encoder_labels[i])) metric_ops = metric_ops_mprop return pred, loss_sup, metric_ops # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] self.encoder_labels = [] for i in range(buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) if self.label_states: if self.reg and self.num_prop > 1: for i in range(self.num_prop): self.encoder_labels.append( tf.placeholder(tf.float32, shape=[None], name="label{0}".format(i))) else: self.encoder_labels.append( tf.placeholder(tf.float32 if self.reg else tf.int32, shape=[None], name="label{0}".format(0))) for i in range(buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in range(len(self.decoder_inputs) - 1) ] # Training outputs and losses. if forward_only: self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True), softmax_loss_function=softmax_loss_function) # If we use output projection, we need to project outputs for decoding. if output_projection is not None: for b in range(len(buckets)): self.outputs[b] = [ tf.matmul(output, output_projection[0]) + output_projection[1] for output in self.outputs[b] ] else: self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, False), softmax_loss_function=softmax_loss_function) if self.label_states: self.loss_supervised = [None] * len(buckets) self.pred = [None] * len(buckets) self.sup_metrics = [None] * len(buckets) for bucket_id in range(len(buckets)): self.pred[bucket_id], self.loss_supervised[bucket_id],\ self.sup_metrics[bucket_id] = ( pred_net(bucket_id, self.encoder_labels)) # Gradients and SGD update operation for training the model. params = tf.trainable_variables() self.summary_ops = [] self.test_summary_ops = [] # TODO(zhengxu): This is a workaround to avoid test summary initialization # from train script. # Append test summaries. self.test_summary_ops = [ self.get_em_acc_op(bucket_id) for bucket_id in range(len(buckets)) ] if not forward_only: self.gradient_norms = [] self.updates = [] lr_summary_op = tf.summary.scalar("learning_rate", self.learning_rate_op) opt = tf.train.GradientDescentOptimizer(self.learning_rate_op) for b in range(len(buckets)): loss = self.losses[b] if hparams.use_recovery else 0. if self.label_states: loss = (1.0 - self.alpha ) * loss + self.alpha * self.loss_supervised[b] gradients = tf.gradients(loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, hparams.max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)) bucket_summary_ops = [ # Global norm in each buckets. tf.summary.scalar("global_norm_%d" % b, norm), # Unsupervised (Recovery) Loss in each buckets. tf.summary.scalar("loss_unsup_%d" % b, self.losses[b]), # Learning rate summary op. lr_summary_op ] if self.label_states: bucket_summary_ops.append([ # Supervised (Classification) Loss. tf.summary.scalar("loss_sup_%d" % b, self.loss_supervised[b]), # Total loss (Multi-task loss). tf.summary.scalar("total_loss_%d" % b, loss) ] + [ # Supervised task evaluation metric. tf.summary.scalar("%s_%d" % (k, b), v) for k, v in self.sup_metrics[b].items() ] if self.num_prop == 1 else [ tf.summary.scalar("%s_%d_%d" % (k, b, i), v) for i in range(self.num_prop) for k, v in self.sup_metrics[b][i].items() ]) self.summary_ops.append(tf.summary.merge(bucket_summary_ops)) variables_to_restore = [ v for v in tf.global_variables() if v.name.split('/')[0] != 'pred' ] self.saver_sup = tf.train.Saver(tf.global_variables(), save_relative_paths=True) self.saver_unsup = tf.train.Saver(variables_to_restore, save_relative_paths=True)