def disp_model_info(): with tf.Graph().as_default(): # Dummy placeholders for arbitrary number of 1d inputs and outputs inputs = tf.placeholder(tf.float32, shape=(None, 1)) outputs = tf.placeholder(tf.float32, shape=(None, 1)) # Build model predictions, end_points = regression_model(inputs) # Print name and shape of each tensor. print("Layers") for k, v in end_points.items(): print('name = {}, shape = {}'.format(v.name, v.get_shape())) # Print name and shape of parameter nodes (values not yet initialized) print("\n") print("Parameters") for v in slim.get_model_variables(): print('name = {}, shape = {}'.format(v.name, v.get_shape())) print("\n") print("Local Parameters") for v in slim.get_local_variables(): print('name = {}, shape = {}'.format(v.name, v.get_shape())) return
def metric_def(self): self.training_metrics = { "learning_rate": self.learning_rate, "adam_beta_1": self.adam_beta_1, "adam_beta_2": self.adam_beta_2, "ema_decay": self.ema_decay, "cons_coefficient": self.cons_coefficient, "train/error/1": self.mean_error_1, "train/error/ema": self.mean_error_ema, "train/class_cost/1": self.mean_class_cost_1, "train/class_cost/ema": self.mean_class_cost_ema, "train/cons_cost/mt": self.mean_cons_cost_mt, "train/total_cost/mt": self.mean_total_cost_mt, } with tf.variable_scope("validation_metrics") as metrics_scope: self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map( { "eval/error/1": streaming_mean(self.errors_1), "eval/error/ema": streaming_mean(self.errors_ema), "eval/class_cost/1": streaming_mean(self.class_costs_1), "eval/class_cost/ema": streaming_mean(self.class_costs_ema), }) metric_variables = slim.get_local_variables( scope=metrics_scope.name) self.metric_init_op = tf.variables_initializer(metric_variables) self.result_formatter = string_utils.DictFormatter( order=[ "eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt" ], default_format='{name}: {value:>10.6f}', separator=", ") self.result_formatter.add_format('error', '{name}: {value:>6.1%}')
def __init__(self, run_context=None): if run_context is not None: self.training_log = run_context.create_train_log('training') self.validation_log = run_context.create_train_log('validation') self.checkpoint_path = os.path.join(run_context.transient_dir, 'checkpoint') self.tensorboard_path = os.path.join(run_context.result_dir, 'tensorboard') with tf.name_scope("placeholders"): self.images = tf.placeholder(dtype=tf.float32, shape=(None, 32, 32, 3), name='images') self.labels = tf.placeholder(dtype=tf.int32, shape=(None, ), name='labels') self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') self.global_step = tf.Variable(0, trainable=False, name='global_step') tf.add_to_collection("init_in_init", self.global_step) self.hyper = HyperparamVariables(self.DEFAULT_HYPERPARAMS) for var in self.hyper.variables.values(): tf.add_to_collection("init_in_init", var) with tf.name_scope("ramps"): sigmoid_rampup_value = sigmoid_rampup(self.global_step, self.hyper['rampup_length']) sigmoid_rampdown_value = sigmoid_rampdown( self.global_step, self.hyper['rampdown_length'], self.hyper['training_length']) self.learning_rate = tf.multiply(sigmoid_rampup_value * sigmoid_rampdown_value, self.hyper['max_learning_rate'], name='learning_rate') self.adam_beta_1 = tf.add( sigmoid_rampdown_value * self.hyper['adam_beta_1_before_rampdown'], (1 - sigmoid_rampdown_value) * self.hyper['adam_beta_1_after_rampdown'], name='adam_beta_1') self.cons_coefficient = tf.multiply( sigmoid_rampup_value, self.hyper['max_consistency_cost'], name='consistency_coefficient') step_rampup_value = step_rampup(self.global_step, self.hyper['rampup_length']) self.adam_beta_2 = tf.add( (1 - step_rampup_value) * self.hyper['adam_beta_2_during_rampup'], step_rampup_value * self.hyper['adam_beta_2_after_rampup'], name='adam_beta_2') self.ema_decay = tf.add( (1 - step_rampup_value) * self.hyper['ema_decay_during_rampup'], step_rampup_value * self.hyper['ema_decay_after_rampup'], name='ema_decay') ((self.class_logits_1, self.cons_logits_1), (self.class_logits_2, self.cons_logits_2), (self.class_logits_ema, self.cons_logits_ema)) = inference( self.images, is_training=self.is_training, ema_decay=self.ema_decay, input_noise=self.hyper['input_noise'], student_dropout_probability=self. hyper['student_dropout_probability'], teacher_dropout_probability=self. hyper['teacher_dropout_probability'], normalize_input=self.hyper['normalize_input'], flip_horizontally=self.hyper['flip_horizontally'], translate=self.hyper['translate'], num_logits=self.hyper['num_logits']) with tf.name_scope("objectives"): self.mean_error_1, self.errors_1 = errors(self.class_logits_1, self.labels) self.mean_error_ema, self.errors_ema = errors( self.class_logits_ema, self.labels) self.mean_class_cost_1, self.class_costs_1 = classification_costs( self.class_logits_1, self.labels) self.mean_class_cost_ema, self.class_costs_ema = classification_costs( self.class_logits_ema, self.labels) labeled_consistency = self.hyper['apply_consistency_to_labeled'] consistency_mask = tf.logical_or(tf.equal(self.labels, -1), labeled_consistency) self.mean_cons_cost_pi, self.cons_costs_pi = consistency_costs( self.cons_logits_1, self.class_logits_2, self.cons_coefficient, consistency_mask, self.hyper['consistency_trust']) self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs( self.cons_logits_1, self.class_logits_ema, self.cons_coefficient, consistency_mask, self.hyper['consistency_trust']) def l2_norms(matrix): l2s = tf.reduce_sum(matrix**2, axis=1) mean_l2 = tf.reduce_mean(l2s) return mean_l2, l2s self.mean_res_l2_1, self.res_l2s_1 = l2_norms(self.class_logits_1 - self.cons_logits_1) self.mean_res_l2_ema, self.res_l2s_ema = l2_norms( self.class_logits_ema - self.cons_logits_ema) self.res_costs_1 = self.hyper[ 'logit_distance_cost'] * self.res_l2s_1 self.mean_res_cost_1 = tf.reduce_mean(self.res_costs_1) self.res_costs_ema = self.hyper[ 'logit_distance_cost'] * self.res_l2s_ema self.mean_res_cost_ema = tf.reduce_mean(self.res_costs_ema) self.mean_total_cost_pi, self.total_costs_pi = total_costs( self.class_costs_1, self.cons_costs_pi, self.res_costs_1) self.mean_total_cost_mt, self.total_costs_mt = total_costs( self.class_costs_1, self.cons_costs_mt, self.res_costs_1) assert_shape(self.total_costs_pi, [3]) assert_shape(self.total_costs_mt, [3]) self.cost_to_be_minimized = tf.cond( self.hyper['ema_consistency'], lambda: self.mean_total_cost_mt, lambda: self.mean_total_cost_pi) with tf.name_scope("train_step"): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_step_op = nn.adam_optimizer( self.cost_to_be_minimized, self.global_step, learning_rate=self.learning_rate, beta1=self.adam_beta_1, beta2=self.adam_beta_2, epsilon=self.hyper['adam_epsilon']) self.training_control = training_control(self.global_step, self.hyper['print_span'], self.hyper['evaluation_span'], self.hyper['training_length']) self.training_metrics = { "learning_rate": self.learning_rate, "adam_beta_1": self.adam_beta_1, "adam_beta_2": self.adam_beta_2, "ema_decay": self.ema_decay, "cons_coefficient": self.cons_coefficient, "train/error/1": self.mean_error_1, "train/error/ema": self.mean_error_ema, "train/class_cost/1": self.mean_class_cost_1, "train/class_cost/ema": self.mean_class_cost_ema, "train/cons_cost/pi": self.mean_cons_cost_pi, "train/cons_cost/mt": self.mean_cons_cost_mt, "train/res_cost/1": self.mean_res_cost_1, "train/res_cost/ema": self.mean_res_cost_ema, "train/total_cost/pi": self.mean_total_cost_pi, "train/total_cost/mt": self.mean_total_cost_mt, } with tf.variable_scope("validation_metrics") as metrics_scope: self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map( { "eval/error/1": streaming_mean(self.errors_1), "eval/error/ema": streaming_mean(self.errors_ema), "eval/class_cost/1": streaming_mean(self.class_costs_1), "eval/class_cost/ema": streaming_mean(self.class_costs_ema), "eval/res_cost/1": streaming_mean(self.res_costs_1), "eval/res_cost/ema": streaming_mean(self.res_costs_ema), }) metric_variables = slim.get_local_variables( scope=metrics_scope.name) self.metric_init_op = tf.variables_initializer(metric_variables) self.result_formatter = string_utils.DictFormatter( order=[ "eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt" ], default_format='{name}: {value:>10.6f}', separator=", ") self.result_formatter.add_format('error', '{name}: {value:>6.1%}') with tf.name_scope("initializers"): init_init_variables = tf.get_collection("init_in_init") train_init_variables = [ var for var in tf.global_variables() if var not in init_init_variables ] self.init_init_op = tf.variables_initializer(init_init_variables) self.train_init_op = tf.variables_initializer(train_init_variables) self.saver = tf.train.Saver() self.session = tf.Session() self.run(self.init_init_op)
def __init__(self, run_context=None, hyper_dict={}): #inilization of hyper for i in hyper_dict: assert i in self.hyper, "Wrong hyper dict '{}'!".format(i) self.hyper[i] = hyper_dict[i] # inilize bg noise input if self.hyper['bg_noise']: self.bg_noise_input = tf.convert_to_tensor(self.hyper['bg_noise_input'],dtype=tf.float32) else: self.bg_noise_input = tf.convert_to_tensor(np.zeros((32,32)),dtype=tf.float32) # inilization model print('{} is initliazed!'.format(self.hyper['cnn'])) self.cnn = getattr(model,self.hyper['cnn']) if run_context is not None: self.training_log = run_context.create_train_log('training') self.validation_log = run_context.create_train_log('validation') self.checkpoint_path = os.path.join(run_context.transient_dir, 'checkpoint') self.tensorboard_path = os.path.join(run_context.result_dir, 'tensorboard') with tf.name_scope("placeholders"): self.images = tf.placeholder(dtype=tf.float32, shape=(None,) + self.hyper['input_dim'], name='images') self.labels = tf.placeholder(dtype=tf.int32, shape=(None,) + self.hyper['label_dim'], name='labels') self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') self.global_step = tf.Variable(0, trainable=False, name='global_step') tf.add_to_collection("init_in_init", self.global_step) with tf.name_scope("ramps"): self.learning_rate, self.cons_coefficient, \ self.adam_beta_1, self.adam_beta_2, \ self.ema_decay = ramp_value(self.global_step,self.hyper) ( self.class_logits_1, self.class_logits_ema ) = self.inference( self.images, is_training=self.is_training, ema_decay=self.ema_decay, input_noise=self.hyper['input_noise'], student_dropout_probability=self.hyper['student_dropout_probability'], teacher_dropout_probability=self.hyper['teacher_dropout_probability'], normalize_input=self.hyper['normalize_input'], flip_horizontally=self.hyper['flip_horizontally'], translate=self.hyper['translate']) with tf.name_scope("objectives"): self.mean_error_1, self.errors_1 = errors(self.class_logits_1, self.labels,sig = self.hyper['sig']) self.mean_error_ema, self.errors_ema = errors(self.class_logits_ema, self.labels,sig = self.hyper['sig']) self.mean_class_cost_1, self.class_costs_1 = classification_costs( self.class_logits_1, self.labels,sig = self.hyper['sig']) self.mean_class_cost_ema, self.class_costs_ema = classification_costs( self.class_logits_ema, self.labels,sig = self.hyper['sig']) labeled_consistency = self.hyper['apply_consistency_to_labeled'] consistency_mask = tf.logical_or(tf.equal(self.labels, -1), labeled_consistency) self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs( self.class_logits_1, self.class_logits_ema, self.cons_coefficient, consistency_mask, self.hyper['consistency_trust'],sig = 'softmax') self.mean_total_cost_mt, self.total_costs_mt = total_costs( self.class_costs_1, self.cons_costs_mt) self.cost_to_be_minimized = self.mean_total_cost_mt with tf.name_scope("train_step"): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if self.hyper['optimizer']=='adam': self.train_step_op = nn.adam_optimizer(self.cost_to_be_minimized, self.global_step, learning_rate=self.learning_rate, beta1=self.adam_beta_1, beta2=self.adam_beta_2, epsilon=self.hyper['adam_epsilon']) elif self.hyper['optimizer']=='sgd': self.train_step_op = nn.sgd_optimizer(self.cost_to_be_minimized, self.global_step, learning_rate=self.hyper['max_learning_rate']) else: assert False, 'Wrong optimizer input!' self.training_metrics = { "learning_rate": self.learning_rate, "adam_beta_1": self.adam_beta_1, "adam_beta_2": self.adam_beta_2, "ema_decay": self.ema_decay, "cons_coefficient": self.cons_coefficient, "train/error/1": self.mean_error_1, "train/error/ema": self.mean_error_ema, "train/class_cost/1": self.mean_class_cost_1, "train/class_cost/ema": self.mean_class_cost_ema, "train/cons_cost/mt": self.mean_cons_cost_mt, "train/total_cost/mt": self.mean_total_cost_mt, } with tf.variable_scope("validation_metrics") as metrics_scope: self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map({ "eval/error/1": streaming_mean(self.errors_1), "eval/error/ema": streaming_mean(self.errors_ema), "eval/class_cost/1": streaming_mean(self.class_costs_1), "eval/class_cost/ema": streaming_mean(self.class_costs_ema), }) metric_variables = slim.get_local_variables(scope=metrics_scope.name) self.metric_init_op = tf.variables_initializer(metric_variables) self.result_formatter = string_utils.DictFormatter( order=["eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt"], default_format='{name}: {value:>10.6f}', separator=", ") self.result_formatter.add_format('error', '{name}: {value:>6.1%}') with tf.name_scope("initializers"): init_init_variables = tf.get_collection("init_in_init") train_init_variables = [ var for var in tf.global_variables() if var not in init_init_variables ] self.init_init_op = tf.variables_initializer(init_init_variables) self.train_init_op = tf.variables_initializer(train_init_variables) self.saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True self.session = tf.Session(config=config) self.run(self.init_init_op) self.save_tensorboard_graph()
def __init__(self, result_dir): self.checkpoint_dir = os.path.join(result_dir, 'checkpoints') self.summary_dir = os.path.join(result_dir, 'summaries') os.makedirs(self.checkpoint_dir) os.makedirs(self.summary_dir) with tf.name_scope("placeholders"): self.images = tf.placeholder(dtype=tf.float32, shape=(None, 32, 32, 3), name='images') self.labels = tf.placeholder(dtype=tf.int32, shape=(None, ), name='labels') self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') self.global_step = tf.Variable(0, trainable=False, name='global_step') tf.add_to_collection("init_in_init", self.global_step) self.hyper = HyperparamVariables(self.DEFAULT_HYPERPARAMS) for var in self.hyper.variables.values(): tf.add_to_collection("init_in_init", var) with tf.name_scope("ramps"): sigmoid_rampup_value = sigmoid_rampup(self.global_step, self.hyper['rampup_length']) sigmoid_rampdown_value = sigmoid_rampdown( self.global_step, self.hyper['rampdown_length'], self.hyper['training_length']) self.learning_rate = tf.multiply(sigmoid_rampup_value * sigmoid_rampdown_value, self.hyper['max_learning_rate'], name='learning_rate') self.adam_beta_1 = tf.add( sigmoid_rampdown_value * self.hyper['adam_beta_1_before_rampdown'], (1 - sigmoid_rampdown_value) * self.hyper['adam_beta_1_after_rampdown'], name='adam_beta_1') self.cons_coefficient = tf.multiply( sigmoid_rampup_value, self.hyper['max_consistency_coefficient'], name='consistency_coefficient') step_rampup_value = step_rampup(self.global_step, self.hyper['rampup_length']) self.adam_beta_2 = tf.add( (1 - step_rampup_value) * self.hyper['adam_beta_2_during_rampup'], step_rampup_value * self.hyper['adam_beta_2_after_rampup'], name='adam_beta_2') self.ema_decay = tf.add( (1 - step_rampup_value) * self.hyper['ema_decay_during_rampup'], step_rampup_value * self.hyper['ema_decay_after_rampup'], name='ema_decay') self.logits_1, self.logits_2, self.logits_ema = inference( self.images, is_training=self.is_training, ema_decay=self.ema_decay, normalize_input=self.hyper['normalize_input'], flip_horizontally=self.hyper['flip_horizontally']) with tf.name_scope("objectives"): self.mean_error_1, self.errors_1 = errors(self.logits_1, self.labels) self.mean_error_ema, self.errors_ema = errors( self.logits_ema, self.labels) self.mean_class_cost_1, self.class_costs_1 = classification_costs( self.logits_1, self.labels) self.mean_class_cost_ema, self.class_costs_ema = classification_costs( self.logits_ema, self.labels) labeled_consistency = self.hyper['apply_consistency_to_labeled'] consistency_mask = tf.logical_or(tf.equal(self.labels, -1), labeled_consistency) self.mean_cons_cost_pi, self.cons_costs_pi = consistency_costs( self.logits_1, self.logits_2, self.cons_coefficient, consistency_mask) self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs( self.logits_1, self.logits_ema, self.cons_coefficient, consistency_mask) self.mean_total_cost_pi, self.total_costs_pi = total_costs( self.class_costs_1, self.cons_costs_pi) self.mean_total_cost_mt, self.total_costs_mt = total_costs( self.class_costs_1, self.cons_costs_mt) self.cost_to_be_minimized = tf.cond( self.hyper['ema_consistency'], lambda: self.mean_total_cost_mt, lambda: self.mean_total_cost_pi) with tf.name_scope("train_step"): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_step_op = nn.adam_optimizer( self.cost_to_be_minimized, self.global_step, learning_rate=self.learning_rate, beta1=self.adam_beta_1, beta2=self.adam_beta_2, epsilon=self.hyper['adam_epsilon']) self.training_control = training_control(self.global_step, self.hyper['print_span'], self.hyper['evaluation_span'], self.hyper['training_length']) self.training_metrics = { "learning_rate": self.learning_rate, "adam_beta_1": self.adam_beta_1, "adam_beta_2": self.adam_beta_2, "ema_decay": self.ema_decay, "cons_coefficient": self.cons_coefficient, "train/error/1": self.mean_error_1, "train/error/ema": self.mean_error_ema, "train/class_cost/1": self.mean_class_cost_1, "train/class_cost/ema": self.mean_class_cost_ema, "train/cons_cost/pi": self.mean_cons_cost_pi, "train/cons_cost/mt": self.mean_cons_cost_mt, "train/total_cost/pi": self.mean_total_cost_pi, "train/total_cost/mt": self.mean_total_cost_mt, } with tf.variable_scope("validation_metrics") as metrics_scope: self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map( { "eval/error/1": streaming_mean(self.errors_1), "eval/error/ema": streaming_mean(self.errors_ema), # Note that the evaluation costs are not directly comparable # to the training costs. Training batches contain unlabeled # samples but the evaluation batches do not. Because # classification cost is zero for unlabeled samples, the # training costs are smaller than evaluation costs when # doing semi-supervised learning. "eval/class_cost/1": streaming_mean(self.class_costs_1), "eval/class_cost/ema": streaming_mean(self.class_costs_ema), "eval/cons_cost/pi": streaming_mean(self.cons_costs_pi), "eval/cons_cost/mt": streaming_mean(self.cons_costs_mt), "eval/total_cost/pi": streaming_mean(self.total_costs_pi), "eval/total_cost/mt": streaming_mean(self.total_costs_mt) }) metric_variables = slim.get_local_variables( scope=metrics_scope.name) self.metric_init_op = tf.variables_initializer(metric_variables) with tf.name_scope("initializers"): init_init_variables = tf.get_collection("init_in_init") train_init_variables = [ var for var in tf.global_variables() if var not in init_init_variables ] self.init_init_op = tf.variables_initializer(init_init_variables) self.train_init_op = tf.variables_initializer(train_init_variables) self.saver = tf.train.Saver() self.session = tf.Session() self.run(self.init_init_op)
def __init__( self, config, output_dir="./output", use_rnn=False, testing=False, use_best=False, ): self.config = config self.output_dir = output_dir self.checkpoint_path = os.path.join(self.output_dir, "checkpoint") self.best_ckpt_path = os.path.join(self.output_dir, "best_ckpt") self.weights_path = os.path.join(self.output_dir, "weights") self.log_dir = os.path.join(self.output_dir, "log") self.use_rnn = use_rnn # Placeholder with tf.variable_scope("placeholders") as scope: self.signals = tf.placeholder(dtype=tf.float32, shape=(None, self.config["input_size"], 1, 1), name='signals') self.labels = tf.placeholder(dtype=tf.int32, shape=(None, ), name='labels') self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') if self.use_rnn: self.loss_weights = tf.placeholder(dtype=tf.float32, shape=(None, ), name='loss_weights') self.seq_lengths = tf.placeholder(dtype=tf.int32, shape=(None, ), name='seq_lengths') # Monitor global step update self.global_step = tf.Variable(0, trainable=False, name='global_step') # Monitor the number of epochs passed self.global_epoch = tf.Variable(0, trainable=False, name='global_epoch') # Build a network that receives inputs from placeholders net = self.build_cnn() if self.use_rnn: # Check whether the corresponding config is given if "n_rnn_layers" not in self.config: raise Exception("Invalid config.") # Append the RNN if needed net = self.append_rnn(net) # Softmax linear net = nn.fc("softmax_linear", net, self.config["n_classes"], bias=0.0) # Outputs self.logits = net self.preds = tf.argmax(self.logits, axis=1) # Cross-entropy loss self.loss_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.labels, logits=self.logits, name="loss_ce_per_sample") with tf.name_scope("loss_ce_mean") as scope: if self.use_rnn: # Weight by sequence loss_w_seq = tf.multiply(self.loss_weights, self.loss_per_sample) # Weight by class sample_weights = tf.reduce_sum( tf.multiply( tf.one_hot(indices=self.labels, depth=self.config["n_classes"]), np.asarray(self.config["class_weights"], dtype=np.float32)), 1) loss_w_class = tf.multiply(loss_w_seq, sample_weights) # Computer average loss scaled with the sequence length self.loss_ce = tf.reduce_sum(loss_w_class) / tf.reduce_sum( self.loss_weights) else: self.loss_ce = tf.reduce_mean(self.loss_per_sample) # Regularization loss self.reg_losses = self.regularization_loss() # Total loss self.loss = self.loss_ce + self.reg_losses # Metrics (used when we want to compute a metric from the output from minibatches) with tf.variable_scope("stream_metrics") as scope: self.metric_value_op, self.metric_update_op = contrib_metrics.aggregate_metric_map( { "loss": tf.metrics.mean(values=self.loss), "accuracy": tf.metrics.accuracy(labels=self.labels, predictions=self.preds), "precision": tf.metrics.precision(labels=self.labels, predictions=self.preds), "recall": tf.metrics.recall(labels=self.labels, predictions=self.preds), }) # Manually create reset operations of local vars metric_vars = contrib_slim.get_local_variables(scope=scope.name) self.metric_init_op = tf.variables_initializer(metric_vars) # Training outputs self.train_outputs = { "global_step": self.global_step, "train/loss": self.loss, "train/preds": self.preds, "train/stream_metrics": self.metric_update_op, } if self.use_rnn: self.train_outputs.update({ "train/init_state": self.init_state, "train/final_state": self.final_state, }) # Test outputs self.test_outputs = { "global_step": self.global_step, "test/loss": self.loss, "test/preds": self.preds, } if self.use_rnn: self.test_outputs.update({ "test/init_state": self.init_state, "test/final_state": self.final_state, }) # Tensoflow config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) if not testing: self.train_writer = tf.summary.FileWriter( os.path.join(self.log_dir, "train")) self.train_writer.add_graph(self.sess.graph) logger.info("Saved tensorboard graph to {}".format( self.train_writer.get_logdir())) # Optimizer if not testing: # self.lr = tf.train.exponential_decay( # learning_rate=self.config["learning_rate_decay"], # global_step=self.global_step, # decay_steps=self.config["decay_steps"], # decay_rate=self.config["decay_rate"], # staircase=False, # name="learning_rate" # ) self.lr = tf.constant(self.config["learning_rate"], dtype=tf.float32) with tf.variable_scope("optimizer") as scope: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # Pretraining if not self.use_rnn: self.train_step_op, self.grad_op = nn.adam_optimizer( loss=self.loss, training_variables=tf.trainable_variables(), global_step=self.global_step, # learning_rate=self.config["learning_rate"], learning_rate=self.lr, beta1=self.config["adam_beta_1"], beta2=self.config["adam_beta_2"], epsilon=self.config["adam_epsilon"], ) # Fine-tuning else: # Use different learning rates for CNN and RNN self.train_step_op, self.grad_op = nn.adam_optimizer_clip( loss=self.loss, training_variables=tf.trainable_variables(), global_step=self.global_step, # learning_rate=self.config["learning_rate"], learning_rate=self.lr, beta1=self.config["adam_beta_1"], beta2=self.config["adam_beta_2"], epsilon=self.config["adam_epsilon"], clip_value=self.config["clip_grad_value"], ) # Initializer with tf.variable_scope("initializer") as scope: # tf.trainable_variables() or tf.global_variables() self.init_global_op = tf.variables_initializer( tf.global_variables()) self.init_local_op = tf.variables_initializer(tf.local_variables()) # Saver for storing variables self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.best_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # Initialize variables self.run([self.init_global_op, self.init_local_op]) # Restore variables (if possible) is_restore = False if use_best: if os.path.exists(self.best_ckpt_path): if os.path.isfile( os.path.join(self.best_ckpt_path, "checkpoint")): # Restore the last checkpoint latest_checkpoint = tf.train.latest_checkpoint( self.best_ckpt_path) self.saver.restore(self.sess, latest_checkpoint) logger.info("Best model restored from {}".format( latest_checkpoint)) is_restore = True else: if os.path.exists(self.checkpoint_path): if os.path.isfile( os.path.join(self.checkpoint_path, "checkpoint")): # Restore the last checkpoint latest_checkpoint = tf.train.latest_checkpoint( self.checkpoint_path) self.saver.restore(self.sess, latest_checkpoint) logger.info( "Model restored from {}".format(latest_checkpoint)) is_restore = True if not is_restore: logger.info("Model started from random weights")
def __init__(self, run_context=None): if run_context is not None: self.training_log = run_context.create_train_log('training') self.validation_log = run_context.create_train_log('validation') self.checkpoint_path = os.path.join(run_context.transient_dir, 'checkpoint') self.tensorboard_path = os.path.join(run_context.result_dir, 'tensorboard') with tf.name_scope("placeholders"): self.images = tf.placeholder(dtype=tf.float32, shape=(None, 32, 32, 3), name='images') self.labels = tf.placeholder(dtype=tf.int32, shape=(None,), name='labels') self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') self.global_step = tf.Variable(0, trainable=False, name='global_step') tf.add_to_collection("init_in_init", self.global_step) self.hyper = HyperparamVariables(self.DEFAULT_HYPERPARAMS) for var in self.hyper.variables.values(): tf.add_to_collection("init_in_init", var) with tf.name_scope("ramps"): sigmoid_rampup_value = sigmoid_rampup(self.global_step, self.hyper['rampup_length']) sigmoid_rampdown_value = sigmoid_rampdown(self.global_step, self.hyper['rampdown_length'], self.hyper['training_length']) self.learning_rate = tf.multiply(sigmoid_rampup_value * sigmoid_rampdown_value, self.hyper['max_learning_rate'], name='learning_rate') self.adam_beta_1 = tf.add(sigmoid_rampdown_value * self.hyper['adam_beta_1_before_rampdown'], (1 - sigmoid_rampdown_value) * self.hyper['adam_beta_1_after_rampdown'], name='adam_beta_1') self.cons_coefficient = tf.multiply(sigmoid_rampup_value, self.hyper['max_consistency_cost'], name='consistency_coefficient') step_rampup_value = step_rampup(self.global_step, self.hyper['rampup_length']) self.adam_beta_2 = tf.add((1 - step_rampup_value) * self.hyper['adam_beta_2_during_rampup'], step_rampup_value * self.hyper['adam_beta_2_after_rampup'], name='adam_beta_2') self.ema_decay = tf.add((1 - step_rampup_value) * self.hyper['ema_decay_during_rampup'], step_rampup_value * self.hyper['ema_decay_after_rampup'], name='ema_decay') ( (self.class_logits_1, self.cons_logits_1), (self.class_logits_2, self.cons_logits_2), (self.class_logits_ema, self.cons_logits_ema) ) = inference( self.images, is_training=self.is_training, ema_decay=self.ema_decay, input_noise=self.hyper['input_noise'], student_dropout_probability=self.hyper['student_dropout_probability'], teacher_dropout_probability=self.hyper['teacher_dropout_probability'], normalize_input=self.hyper['normalize_input'], flip_horizontally=self.hyper['flip_horizontally'], translate=self.hyper['translate'], num_logits=self.hyper['num_logits']) with tf.name_scope("objectives"): self.mean_error_1, self.errors_1 = errors(self.class_logits_1, self.labels) self.mean_error_ema, self.errors_ema = errors(self.class_logits_ema, self.labels) self.mean_class_cost_1, self.class_costs_1 = classification_costs( self.class_logits_1, self.labels) self.mean_class_cost_ema, self.class_costs_ema = classification_costs( self.class_logits_ema, self.labels) labeled_consistency = self.hyper['apply_consistency_to_labeled'] consistency_mask = tf.logical_or(tf.equal(self.labels, -1), labeled_consistency) self.mean_cons_cost_pi, self.cons_costs_pi = consistency_costs( self.cons_logits_1, self.class_logits_2, self.cons_coefficient, consistency_mask, self.hyper['consistency_trust']) self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs( self.cons_logits_1, self.class_logits_ema, self.cons_coefficient, consistency_mask, self.hyper['consistency_trust']) def l2_norms(matrix): l2s = tf.reduce_sum(matrix ** 2, axis=1) mean_l2 = tf.reduce_mean(l2s) return mean_l2, l2s self.mean_res_l2_1, self.res_l2s_1 = l2_norms(self.class_logits_1 - self.cons_logits_1) self.mean_res_l2_ema, self.res_l2s_ema = l2_norms(self.class_logits_ema - self.cons_logits_ema) self.res_costs_1 = self.hyper['logit_distance_cost'] * self.res_l2s_1 self.mean_res_cost_1 = tf.reduce_mean(self.res_costs_1) self.res_costs_ema = self.hyper['logit_distance_cost'] * self.res_l2s_ema self.mean_res_cost_ema = tf.reduce_mean(self.res_costs_ema) self.mean_total_cost_pi, self.total_costs_pi = total_costs( self.class_costs_1, self.cons_costs_pi, self.res_costs_1) self.mean_total_cost_mt, self.total_costs_mt = total_costs( self.class_costs_1, self.cons_costs_mt, self.res_costs_1) assert_shape(self.total_costs_pi, [3]) assert_shape(self.total_costs_mt, [3]) self.cost_to_be_minimized = tf.cond(self.hyper['ema_consistency'], lambda: self.mean_total_cost_mt, lambda: self.mean_total_cost_pi) with tf.name_scope("train_step"): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_step_op = nn.adam_optimizer(self.cost_to_be_minimized, self.global_step, learning_rate=self.learning_rate, beta1=self.adam_beta_1, beta2=self.adam_beta_2, epsilon=self.hyper['adam_epsilon']) self.training_control = training_control(self.global_step, self.hyper['print_span'], self.hyper['evaluation_span'], self.hyper['training_length']) self.training_metrics = { "learning_rate": self.learning_rate, "adam_beta_1": self.adam_beta_1, "adam_beta_2": self.adam_beta_2, "ema_decay": self.ema_decay, "cons_coefficient": self.cons_coefficient, "train/error/1": self.mean_error_1, "train/error/ema": self.mean_error_ema, "train/class_cost/1": self.mean_class_cost_1, "train/class_cost/ema": self.mean_class_cost_ema, "train/cons_cost/pi": self.mean_cons_cost_pi, "train/cons_cost/mt": self.mean_cons_cost_mt, "train/res_cost/1": self.mean_res_cost_1, "train/res_cost/ema": self.mean_res_cost_ema, "train/total_cost/pi": self.mean_total_cost_pi, "train/total_cost/mt": self.mean_total_cost_mt, } with tf.variable_scope("validation_metrics") as metrics_scope: self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map({ "eval/error/1": streaming_mean(self.errors_1), "eval/error/ema": streaming_mean(self.errors_ema), "eval/class_cost/1": streaming_mean(self.class_costs_1), "eval/class_cost/ema": streaming_mean(self.class_costs_ema), "eval/res_cost/1": streaming_mean(self.res_costs_1), "eval/res_cost/ema": streaming_mean(self.res_costs_ema), }) metric_variables = slim.get_local_variables(scope=metrics_scope.name) self.metric_init_op = tf.variables_initializer(metric_variables) self.result_formatter = string_utils.DictFormatter( order=["eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt"], default_format='{name}: {value:>10.6f}', separator=", ") self.result_formatter.add_format('error', '{name}: {value:>6.1%}') with tf.name_scope("initializers"): init_init_variables = tf.get_collection("init_in_init") train_init_variables = [ var for var in tf.global_variables() if var not in init_init_variables ] self.init_init_op = tf.variables_initializer(init_init_variables) self.train_init_op = tf.variables_initializer(train_init_variables) self.saver = tf.train.Saver() self.session = tf.Session() self.run(self.init_init_op)
def __init__(self, run_context=None): self.name = "Tweet Data Class" if run_context is not None: self.training_log = run_context.create_train_log('training') self.validation_log = run_context.create_train_log('validation') self.checkpoint_path = os.path.join(run_context.transient_dir, 'checkpoint') self.tensorboard_path = os.path.join(run_context.result_dir, 'tensorboard') with tf.name_scope("placeholders"): self.tweets = tf.placeholder(dtype=tf.float32, shape=(None, 500), name='tweets') self.labels = tf.placeholder(dtype=tf.int32, shape=(None,), name='labels') self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training') self.global_step = tf.Variable(0, trainable=False, name='global_step') tf.add_to_collection("init_in_init", self.global_step) self.hyper = HyperparamVariables(self.DEFAULT_HYPERPARAMS) for var in self.hyper.variables.values(): tf.add_to_collection("init_in_init", var) with tf.name_scope("ramps"): # Ramp-up and ramp-down has been removed for simplicity # self.learning_rate = tf.constant(self.hyper['max_learning_rate'], dtype = tf.float32) # self.adam_beta_1 = tf.constant(self.hyper['adam_beta_1_after_rampdown'], dtype = tf.float32) # self.cons_coefficient = tf.constant(self.hyper['max_consistency_cost'], dtype = tf.float32) # self.adam_beta_2 = tf.constant(self.hyper['adam_beta_2_after_rampup'], dtype = tf.float32) # self.ema_decay = tf.constant(self.hyper['ema_decay_after_rampup'], dtype = tf.float32) # self.learning_rate =self.DEFAULT_HYPERPARAMS['max_learning_rate'] # self.adam_beta_1 = self.DEFAULT_HYPERPARAMS['adam_beta_1_after_rampdown'] # self.cons_coefficient = self.DEFAULT_HYPERPARAMS['max_consistency_cost'] # self.adam_beta_2 = self.DEFAULT_HYPERPARAMS['adam_beta_2_after_rampup'] # self.ema_decay = self.DEFAULT_HYPERPARAMS['ema_decay_after_rampup'] sigmoid_rampup_value = sigmoid_rampup(self.global_step, self.hyper['rampup_length']) sigmoid_rampdown_value = sigmoid_rampdown(self.global_step, self.hyper['rampdown_length'], self.hyper['training_length']) self.learning_rate = tf.multiply(sigmoid_rampup_value * sigmoid_rampdown_value, self.hyper['max_learning_rate'], name='learning_rate') self.adam_beta_1 = tf.add(sigmoid_rampdown_value * self.hyper['adam_beta_1_before_rampdown'], (1 - sigmoid_rampdown_value) * self.hyper['adam_beta_1_after_rampdown'], name='adam_beta_1') self.cons_coefficient = tf.multiply(sigmoid_rampup_value, self.hyper['max_consistency_cost'], name='consistency_coefficient') step_rampup_value = step_rampup(self.global_step, self.hyper['rampup_length']) self.adam_beta_2 = tf.add((1 - step_rampup_value) * self.hyper['adam_beta_2_during_rampup'], step_rampup_value * self.hyper['adam_beta_2_after_rampup'], name='adam_beta_2') self.ema_decay = tf.add((1 - step_rampup_value) * self.hyper['ema_decay_during_rampup'], step_rampup_value * self.hyper['ema_decay_after_rampup'], name='ema_decay') # below is where the interesting stuff happens, mostly. # Inference is a function which creates the towers and sets up the different logits for the two models ( (self.class_logits_1, self.cons_logits_1), (self.class_logits_2, self.cons_logits_2), (self.class_logits_ema, self.cons_logits_ema) ) = inference( self.tweets, is_training=self.is_training, ema_decay=self.ema_decay, input_noise=self.hyper['input_noise'], hidden_dims = self.DEFAULT_HYPERPARAMS['hidden_dims'], student_dropout_probability=self.hyper['student_dropout_probability'], teacher_dropout_probability=self.hyper['teacher_dropout_probability'], num_logits=self.hyper['num_logits']) with tf.name_scope("objectives"): # something weird is done with errors for unlabeled examples. # I think errors are only calculated for labeled, but you don't calculate it for unlabeled, so it is NaN for unlabeled self.mean_error_1, self.errors_1 = errors(self.class_logits_1, self.labels) self.mean_error_ema, self.errors_ema = errors(self.class_logits_ema, self.labels) # where we calculate classification costs. # the cost_1 should be for student and ema is for teacher self.mean_class_cost_1, self.class_costs_1 = classification_costs( self.class_logits_1, self.labels) self.mean_class_cost_ema, self.class_costs_ema = classification_costs( self.class_logits_ema, self.labels) labeled_consistency = self.hyper['apply_consistency_to_labeled'] consistency_mask = tf.logical_or(tf.equal(self.labels, -1), labeled_consistency) self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs( self.cons_logits_1, self.class_logits_ema, self.cons_coefficient, consistency_mask) def l2_norms(matrix): l2s = tf.reduce_sum(matrix ** 2, axis=1) mean_l2 = tf.reduce_mean(l2s) return mean_l2, l2s self.mean_res_l2_1, self.res_l2s_1 = l2_norms(self.class_logits_1 - self.cons_logits_1) self.mean_res_l2_ema, self.res_l2s_ema = l2_norms(self.class_logits_ema - self.cons_logits_ema) # mean total cost is what you are optimizng. self.mean_total_cost_mt, self.total_costs_mt = total_costs( self.class_costs_1, self.cons_costs_mt) assert_shape(self.total_costs_mt, [2]) self.cost_to_be_minimized = self.mean_total_cost_mt with tf.name_scope("train_step"): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_step_op = adam_optimizer(self.cost_to_be_minimized, self.global_step, learning_rate=self.learning_rate, beta1=self.adam_beta_1, beta2=self.adam_beta_2, epsilon=self.hyper['adam_epsilon']) # TODO do we really need this? self.training_control = training_control(self.global_step, self.hyper['print_span'], self.hyper['evaluation_span'], self.hyper['training_length']) self.training_metrics = { # NOTE these should not need training, since we don't do ramp-up and ramp-down "learning_rate": self.learning_rate, "adam_beta_1": self.adam_beta_1, "adam_beta_2": self.adam_beta_2, "ema_decay": self.ema_decay, "cons_coefficient": self.cons_coefficient, "train/error/1": self.mean_error_1, "train/error/ema": self.mean_error_ema, "train/class_cost/1": self.mean_class_cost_1, "train/class_cost/ema": self.mean_class_cost_ema, "train/cons_cost/mt": self.mean_cons_cost_mt, "train/total_cost/mt": self.mean_total_cost_mt, } # TODO not sure what streaming mean does? with tf.variable_scope("validation_metrics") as metrics_scope: self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map({ "eval/error/1": streaming_mean(self.errors_1), "eval/error/ema": streaming_mean(self.errors_ema), "eval/class_cost/1": streaming_mean(self.class_costs_1), "eval/class_cost/ema": streaming_mean(self.class_costs_ema), }) metric_variables = slim.get_local_variables(scope=metrics_scope.name) self.metric_init_op = tf.variables_initializer(metric_variables) # TODO string utils just formats dictionary results in a nice way for logging, not needed? self.result_formatter = string_utils.DictFormatter( order=["eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt"], default_format='{name}: {value:>10.6f}', separator=", ") self.result_formatter.add_format('error', '{name}: {value:>6.1%}') with tf.name_scope("initializers"): init_init_variables = tf.get_collection("init_in_init") train_init_variables = [ var for var in tf.global_variables() if var not in init_init_variables ] self.init_init_op = tf.variables_initializer(init_init_variables) print("Train init variables:") for var in train_init_variables: print(var) self.train_init_op = tf.variables_initializer(train_init_variables) self.saver = tf.train.Saver() self.session = tf.Session() self.run(self.init_init_op)