Example #1
0
def disp_model_info():
    with tf.Graph().as_default():
        # Dummy placeholders for arbitrary number of 1d inputs and outputs

        inputs = tf.placeholder(tf.float32, shape=(None, 1))
        outputs = tf.placeholder(tf.float32, shape=(None, 1))

        # Build model
        predictions, end_points = regression_model(inputs)

        # Print name and shape of each tensor.
        print("Layers")
        for k, v in end_points.items():
            print('name = {}, shape = {}'.format(v.name, v.get_shape()))

        # Print name and shape of parameter nodes  (values not yet initialized)
        print("\n")
        print("Parameters")
        for v in slim.get_model_variables():
            print('name = {}, shape = {}'.format(v.name, v.get_shape()))

        print("\n")
        print("Local Parameters")
        for v in slim.get_local_variables():
            print('name = {}, shape = {}'.format(v.name, v.get_shape()))
    return
Example #2
0
    def metric_def(self):
        self.training_metrics = {
            "learning_rate": self.learning_rate,
            "adam_beta_1": self.adam_beta_1,
            "adam_beta_2": self.adam_beta_2,
            "ema_decay": self.ema_decay,
            "cons_coefficient": self.cons_coefficient,
            "train/error/1": self.mean_error_1,
            "train/error/ema": self.mean_error_ema,
            "train/class_cost/1": self.mean_class_cost_1,
            "train/class_cost/ema": self.mean_class_cost_ema,
            "train/cons_cost/mt": self.mean_cons_cost_mt,
            "train/total_cost/mt": self.mean_total_cost_mt,
        }

        with tf.variable_scope("validation_metrics") as metrics_scope:
            self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map(
                {
                    "eval/error/1": streaming_mean(self.errors_1),
                    "eval/error/ema": streaming_mean(self.errors_ema),
                    "eval/class_cost/1": streaming_mean(self.class_costs_1),
                    "eval/class_cost/ema":
                    streaming_mean(self.class_costs_ema),
                })
            metric_variables = slim.get_local_variables(
                scope=metrics_scope.name)
            self.metric_init_op = tf.variables_initializer(metric_variables)

        self.result_formatter = string_utils.DictFormatter(
            order=[
                "eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt"
            ],
            default_format='{name}: {value:>10.6f}',
            separator=",  ")
        self.result_formatter.add_format('error', '{name}: {value:>6.1%}')
Example #3
0
    def __init__(self, run_context=None):
        if run_context is not None:
            self.training_log = run_context.create_train_log('training')
            self.validation_log = run_context.create_train_log('validation')
            self.checkpoint_path = os.path.join(run_context.transient_dir,
                                                'checkpoint')
            self.tensorboard_path = os.path.join(run_context.result_dir,
                                                 'tensorboard')

        with tf.name_scope("placeholders"):
            self.images = tf.placeholder(dtype=tf.float32,
                                         shape=(None, 32, 32, 3),
                                         name='images')
            self.labels = tf.placeholder(dtype=tf.int32,
                                         shape=(None, ),
                                         name='labels')
            self.is_training = tf.placeholder(dtype=tf.bool,
                                              shape=(),
                                              name='is_training')

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        tf.add_to_collection("init_in_init", self.global_step)
        self.hyper = HyperparamVariables(self.DEFAULT_HYPERPARAMS)
        for var in self.hyper.variables.values():
            tf.add_to_collection("init_in_init", var)

        with tf.name_scope("ramps"):
            sigmoid_rampup_value = sigmoid_rampup(self.global_step,
                                                  self.hyper['rampup_length'])
            sigmoid_rampdown_value = sigmoid_rampdown(
                self.global_step, self.hyper['rampdown_length'],
                self.hyper['training_length'])
            self.learning_rate = tf.multiply(sigmoid_rampup_value *
                                             sigmoid_rampdown_value,
                                             self.hyper['max_learning_rate'],
                                             name='learning_rate')
            self.adam_beta_1 = tf.add(
                sigmoid_rampdown_value *
                self.hyper['adam_beta_1_before_rampdown'],
                (1 - sigmoid_rampdown_value) *
                self.hyper['adam_beta_1_after_rampdown'],
                name='adam_beta_1')
            self.cons_coefficient = tf.multiply(
                sigmoid_rampup_value,
                self.hyper['max_consistency_cost'],
                name='consistency_coefficient')

            step_rampup_value = step_rampup(self.global_step,
                                            self.hyper['rampup_length'])
            self.adam_beta_2 = tf.add(
                (1 - step_rampup_value) *
                self.hyper['adam_beta_2_during_rampup'],
                step_rampup_value * self.hyper['adam_beta_2_after_rampup'],
                name='adam_beta_2')
            self.ema_decay = tf.add(
                (1 - step_rampup_value) *
                self.hyper['ema_decay_during_rampup'],
                step_rampup_value * self.hyper['ema_decay_after_rampup'],
                name='ema_decay')

        ((self.class_logits_1, self.cons_logits_1), (self.class_logits_2,
                                                     self.cons_logits_2),
         (self.class_logits_ema, self.cons_logits_ema)) = inference(
             self.images,
             is_training=self.is_training,
             ema_decay=self.ema_decay,
             input_noise=self.hyper['input_noise'],
             student_dropout_probability=self.
             hyper['student_dropout_probability'],
             teacher_dropout_probability=self.
             hyper['teacher_dropout_probability'],
             normalize_input=self.hyper['normalize_input'],
             flip_horizontally=self.hyper['flip_horizontally'],
             translate=self.hyper['translate'],
             num_logits=self.hyper['num_logits'])

        with tf.name_scope("objectives"):
            self.mean_error_1, self.errors_1 = errors(self.class_logits_1,
                                                      self.labels)
            self.mean_error_ema, self.errors_ema = errors(
                self.class_logits_ema, self.labels)

            self.mean_class_cost_1, self.class_costs_1 = classification_costs(
                self.class_logits_1, self.labels)
            self.mean_class_cost_ema, self.class_costs_ema = classification_costs(
                self.class_logits_ema, self.labels)

            labeled_consistency = self.hyper['apply_consistency_to_labeled']
            consistency_mask = tf.logical_or(tf.equal(self.labels, -1),
                                             labeled_consistency)
            self.mean_cons_cost_pi, self.cons_costs_pi = consistency_costs(
                self.cons_logits_1, self.class_logits_2, self.cons_coefficient,
                consistency_mask, self.hyper['consistency_trust'])
            self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs(
                self.cons_logits_1, self.class_logits_ema,
                self.cons_coefficient, consistency_mask,
                self.hyper['consistency_trust'])

            def l2_norms(matrix):
                l2s = tf.reduce_sum(matrix**2, axis=1)
                mean_l2 = tf.reduce_mean(l2s)
                return mean_l2, l2s

            self.mean_res_l2_1, self.res_l2s_1 = l2_norms(self.class_logits_1 -
                                                          self.cons_logits_1)
            self.mean_res_l2_ema, self.res_l2s_ema = l2_norms(
                self.class_logits_ema - self.cons_logits_ema)
            self.res_costs_1 = self.hyper[
                'logit_distance_cost'] * self.res_l2s_1
            self.mean_res_cost_1 = tf.reduce_mean(self.res_costs_1)
            self.res_costs_ema = self.hyper[
                'logit_distance_cost'] * self.res_l2s_ema
            self.mean_res_cost_ema = tf.reduce_mean(self.res_costs_ema)

            self.mean_total_cost_pi, self.total_costs_pi = total_costs(
                self.class_costs_1, self.cons_costs_pi, self.res_costs_1)
            self.mean_total_cost_mt, self.total_costs_mt = total_costs(
                self.class_costs_1, self.cons_costs_mt, self.res_costs_1)
            assert_shape(self.total_costs_pi, [3])
            assert_shape(self.total_costs_mt, [3])

            self.cost_to_be_minimized = tf.cond(
                self.hyper['ema_consistency'], lambda: self.mean_total_cost_mt,
                lambda: self.mean_total_cost_pi)

        with tf.name_scope("train_step"):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step_op = nn.adam_optimizer(
                    self.cost_to_be_minimized,
                    self.global_step,
                    learning_rate=self.learning_rate,
                    beta1=self.adam_beta_1,
                    beta2=self.adam_beta_2,
                    epsilon=self.hyper['adam_epsilon'])

        self.training_control = training_control(self.global_step,
                                                 self.hyper['print_span'],
                                                 self.hyper['evaluation_span'],
                                                 self.hyper['training_length'])

        self.training_metrics = {
            "learning_rate": self.learning_rate,
            "adam_beta_1": self.adam_beta_1,
            "adam_beta_2": self.adam_beta_2,
            "ema_decay": self.ema_decay,
            "cons_coefficient": self.cons_coefficient,
            "train/error/1": self.mean_error_1,
            "train/error/ema": self.mean_error_ema,
            "train/class_cost/1": self.mean_class_cost_1,
            "train/class_cost/ema": self.mean_class_cost_ema,
            "train/cons_cost/pi": self.mean_cons_cost_pi,
            "train/cons_cost/mt": self.mean_cons_cost_mt,
            "train/res_cost/1": self.mean_res_cost_1,
            "train/res_cost/ema": self.mean_res_cost_ema,
            "train/total_cost/pi": self.mean_total_cost_pi,
            "train/total_cost/mt": self.mean_total_cost_mt,
        }

        with tf.variable_scope("validation_metrics") as metrics_scope:
            self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map(
                {
                    "eval/error/1": streaming_mean(self.errors_1),
                    "eval/error/ema": streaming_mean(self.errors_ema),
                    "eval/class_cost/1": streaming_mean(self.class_costs_1),
                    "eval/class_cost/ema":
                    streaming_mean(self.class_costs_ema),
                    "eval/res_cost/1": streaming_mean(self.res_costs_1),
                    "eval/res_cost/ema": streaming_mean(self.res_costs_ema),
                })
            metric_variables = slim.get_local_variables(
                scope=metrics_scope.name)
            self.metric_init_op = tf.variables_initializer(metric_variables)

        self.result_formatter = string_utils.DictFormatter(
            order=[
                "eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt"
            ],
            default_format='{name}: {value:>10.6f}',
            separator=",  ")
        self.result_formatter.add_format('error', '{name}: {value:>6.1%}')

        with tf.name_scope("initializers"):
            init_init_variables = tf.get_collection("init_in_init")
            train_init_variables = [
                var for var in tf.global_variables()
                if var not in init_init_variables
            ]
            self.init_init_op = tf.variables_initializer(init_init_variables)
            self.train_init_op = tf.variables_initializer(train_init_variables)

        self.saver = tf.train.Saver()
        self.session = tf.Session()
        self.run(self.init_init_op)
    def __init__(self, run_context=None, hyper_dict={}):

        #inilization of hyper
        for i in hyper_dict:
            assert i in self.hyper, "Wrong hyper dict '{}'!".format(i)
            self.hyper[i] = hyper_dict[i]

        # inilize bg noise input
        if self.hyper['bg_noise']:
            self.bg_noise_input = tf.convert_to_tensor(self.hyper['bg_noise_input'],dtype=tf.float32)
        else:
            self.bg_noise_input = tf.convert_to_tensor(np.zeros((32,32)),dtype=tf.float32)

        # inilization model
        print('{} is initliazed!'.format(self.hyper['cnn']))
        self.cnn = getattr(model,self.hyper['cnn'])

        if run_context is not None:
            self.training_log = run_context.create_train_log('training')
            self.validation_log = run_context.create_train_log('validation')
            self.checkpoint_path = os.path.join(run_context.transient_dir, 'checkpoint')
            self.tensorboard_path = os.path.join(run_context.result_dir, 'tensorboard')

        with tf.name_scope("placeholders"):
            self.images = tf.placeholder(dtype=tf.float32, shape=(None,) + self.hyper['input_dim'], name='images')
            self.labels = tf.placeholder(dtype=tf.int32, shape=(None,) + self.hyper['label_dim'], name='labels')
            self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        tf.add_to_collection("init_in_init", self.global_step)

        with tf.name_scope("ramps"):
            self.learning_rate, self.cons_coefficient, \
            self.adam_beta_1, self.adam_beta_2, \
            self.ema_decay = ramp_value(self.global_step,self.hyper)


        (   self.class_logits_1,
            self.class_logits_ema
        ) = self.inference(
            self.images,
            is_training=self.is_training,
            ema_decay=self.ema_decay,
            input_noise=self.hyper['input_noise'],
            student_dropout_probability=self.hyper['student_dropout_probability'],
            teacher_dropout_probability=self.hyper['teacher_dropout_probability'],
            normalize_input=self.hyper['normalize_input'],
            flip_horizontally=self.hyper['flip_horizontally'],
            translate=self.hyper['translate'])

        with tf.name_scope("objectives"):
            self.mean_error_1, self.errors_1 = errors(self.class_logits_1, self.labels,sig = self.hyper['sig'])
            self.mean_error_ema, self.errors_ema = errors(self.class_logits_ema, self.labels,sig = self.hyper['sig'])

            self.mean_class_cost_1, self.class_costs_1 = classification_costs(
                self.class_logits_1, self.labels,sig = self.hyper['sig'])
            self.mean_class_cost_ema, self.class_costs_ema = classification_costs(
                self.class_logits_ema, self.labels,sig = self.hyper['sig'])

            labeled_consistency = self.hyper['apply_consistency_to_labeled']
            consistency_mask = tf.logical_or(tf.equal(self.labels, -1), labeled_consistency)
            self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs(
                self.class_logits_1, self.class_logits_ema, self.cons_coefficient, consistency_mask, self.hyper['consistency_trust'],sig = 'softmax')

            self.mean_total_cost_mt, self.total_costs_mt = total_costs(
                self.class_costs_1, self.cons_costs_mt)

            self.cost_to_be_minimized = self.mean_total_cost_mt


        with tf.name_scope("train_step"):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                if self.hyper['optimizer']=='adam':
                    self.train_step_op = nn.adam_optimizer(self.cost_to_be_minimized,
                                                       self.global_step,
                                                       learning_rate=self.learning_rate,
                                                       beta1=self.adam_beta_1,
                                                       beta2=self.adam_beta_2,
                                                       epsilon=self.hyper['adam_epsilon'])
                elif self.hyper['optimizer']=='sgd':
                    self.train_step_op = nn.sgd_optimizer(self.cost_to_be_minimized,
                                                       self.global_step,
                                                       learning_rate=self.hyper['max_learning_rate'])
                else:
                    assert False, 'Wrong optimizer input!'


        self.training_metrics = {
            "learning_rate": self.learning_rate,
            "adam_beta_1": self.adam_beta_1,
            "adam_beta_2": self.adam_beta_2,
            "ema_decay": self.ema_decay,
            "cons_coefficient": self.cons_coefficient,
            "train/error/1": self.mean_error_1,
            "train/error/ema": self.mean_error_ema,
            "train/class_cost/1": self.mean_class_cost_1,
            "train/class_cost/ema": self.mean_class_cost_ema,
            "train/cons_cost/mt": self.mean_cons_cost_mt,
            "train/total_cost/mt": self.mean_total_cost_mt,
        }

        with tf.variable_scope("validation_metrics") as metrics_scope:
            self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map({
                "eval/error/1": streaming_mean(self.errors_1),
                "eval/error/ema": streaming_mean(self.errors_ema),
                "eval/class_cost/1": streaming_mean(self.class_costs_1),
                "eval/class_cost/ema": streaming_mean(self.class_costs_ema),
            })
            metric_variables = slim.get_local_variables(scope=metrics_scope.name)
            self.metric_init_op = tf.variables_initializer(metric_variables)

        self.result_formatter = string_utils.DictFormatter(
            order=["eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt"],
            default_format='{name}: {value:>10.6f}',
            separator=",  ")
        self.result_formatter.add_format('error', '{name}: {value:>6.1%}')

        with tf.name_scope("initializers"):
            init_init_variables = tf.get_collection("init_in_init")
            train_init_variables = [
                var for var in tf.global_variables() if var not in init_init_variables
            ]
            self.init_init_op = tf.variables_initializer(init_init_variables)
            self.train_init_op = tf.variables_initializer(train_init_variables)

        self.saver = tf.train.Saver()
        
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.session = tf.Session(config=config)
        self.run(self.init_init_op)
        self.save_tensorboard_graph()
Example #5
0
    def __init__(self, result_dir):
        self.checkpoint_dir = os.path.join(result_dir, 'checkpoints')
        self.summary_dir = os.path.join(result_dir, 'summaries')
        os.makedirs(self.checkpoint_dir)
        os.makedirs(self.summary_dir)

        with tf.name_scope("placeholders"):
            self.images = tf.placeholder(dtype=tf.float32,
                                         shape=(None, 32, 32, 3),
                                         name='images')
            self.labels = tf.placeholder(dtype=tf.int32,
                                         shape=(None, ),
                                         name='labels')
            self.is_training = tf.placeholder(dtype=tf.bool,
                                              shape=(),
                                              name='is_training')

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        tf.add_to_collection("init_in_init", self.global_step)
        self.hyper = HyperparamVariables(self.DEFAULT_HYPERPARAMS)
        for var in self.hyper.variables.values():
            tf.add_to_collection("init_in_init", var)

        with tf.name_scope("ramps"):
            sigmoid_rampup_value = sigmoid_rampup(self.global_step,
                                                  self.hyper['rampup_length'])
            sigmoid_rampdown_value = sigmoid_rampdown(
                self.global_step, self.hyper['rampdown_length'],
                self.hyper['training_length'])
            self.learning_rate = tf.multiply(sigmoid_rampup_value *
                                             sigmoid_rampdown_value,
                                             self.hyper['max_learning_rate'],
                                             name='learning_rate')
            self.adam_beta_1 = tf.add(
                sigmoid_rampdown_value *
                self.hyper['adam_beta_1_before_rampdown'],
                (1 - sigmoid_rampdown_value) *
                self.hyper['adam_beta_1_after_rampdown'],
                name='adam_beta_1')
            self.cons_coefficient = tf.multiply(
                sigmoid_rampup_value,
                self.hyper['max_consistency_coefficient'],
                name='consistency_coefficient')

            step_rampup_value = step_rampup(self.global_step,
                                            self.hyper['rampup_length'])
            self.adam_beta_2 = tf.add(
                (1 - step_rampup_value) *
                self.hyper['adam_beta_2_during_rampup'],
                step_rampup_value * self.hyper['adam_beta_2_after_rampup'],
                name='adam_beta_2')
            self.ema_decay = tf.add(
                (1 - step_rampup_value) *
                self.hyper['ema_decay_during_rampup'],
                step_rampup_value * self.hyper['ema_decay_after_rampup'],
                name='ema_decay')

        self.logits_1, self.logits_2, self.logits_ema = inference(
            self.images,
            is_training=self.is_training,
            ema_decay=self.ema_decay,
            normalize_input=self.hyper['normalize_input'],
            flip_horizontally=self.hyper['flip_horizontally'])

        with tf.name_scope("objectives"):
            self.mean_error_1, self.errors_1 = errors(self.logits_1,
                                                      self.labels)
            self.mean_error_ema, self.errors_ema = errors(
                self.logits_ema, self.labels)

            self.mean_class_cost_1, self.class_costs_1 = classification_costs(
                self.logits_1, self.labels)
            self.mean_class_cost_ema, self.class_costs_ema = classification_costs(
                self.logits_ema, self.labels)

            labeled_consistency = self.hyper['apply_consistency_to_labeled']
            consistency_mask = tf.logical_or(tf.equal(self.labels, -1),
                                             labeled_consistency)
            self.mean_cons_cost_pi, self.cons_costs_pi = consistency_costs(
                self.logits_1, self.logits_2, self.cons_coefficient,
                consistency_mask)
            self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs(
                self.logits_1, self.logits_ema, self.cons_coefficient,
                consistency_mask)

            self.mean_total_cost_pi, self.total_costs_pi = total_costs(
                self.class_costs_1, self.cons_costs_pi)
            self.mean_total_cost_mt, self.total_costs_mt = total_costs(
                self.class_costs_1, self.cons_costs_mt)

            self.cost_to_be_minimized = tf.cond(
                self.hyper['ema_consistency'], lambda: self.mean_total_cost_mt,
                lambda: self.mean_total_cost_pi)

        with tf.name_scope("train_step"):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step_op = nn.adam_optimizer(
                    self.cost_to_be_minimized,
                    self.global_step,
                    learning_rate=self.learning_rate,
                    beta1=self.adam_beta_1,
                    beta2=self.adam_beta_2,
                    epsilon=self.hyper['adam_epsilon'])

        self.training_control = training_control(self.global_step,
                                                 self.hyper['print_span'],
                                                 self.hyper['evaluation_span'],
                                                 self.hyper['training_length'])

        self.training_metrics = {
            "learning_rate": self.learning_rate,
            "adam_beta_1": self.adam_beta_1,
            "adam_beta_2": self.adam_beta_2,
            "ema_decay": self.ema_decay,
            "cons_coefficient": self.cons_coefficient,
            "train/error/1": self.mean_error_1,
            "train/error/ema": self.mean_error_ema,
            "train/class_cost/1": self.mean_class_cost_1,
            "train/class_cost/ema": self.mean_class_cost_ema,
            "train/cons_cost/pi": self.mean_cons_cost_pi,
            "train/cons_cost/mt": self.mean_cons_cost_mt,
            "train/total_cost/pi": self.mean_total_cost_pi,
            "train/total_cost/mt": self.mean_total_cost_mt,
        }

        with tf.variable_scope("validation_metrics") as metrics_scope:
            self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map(
                {
                    "eval/error/1": streaming_mean(self.errors_1),
                    "eval/error/ema": streaming_mean(self.errors_ema),

                    # Note that the evaluation costs are not directly comparable
                    # to the training costs. Training batches contain unlabeled
                    # samples but the evaluation batches do not. Because
                    # classification cost is zero for unlabeled samples, the
                    # training costs are smaller than evaluation costs when
                    # doing semi-supervised learning.
                    "eval/class_cost/1": streaming_mean(self.class_costs_1),
                    "eval/class_cost/ema":
                    streaming_mean(self.class_costs_ema),
                    "eval/cons_cost/pi": streaming_mean(self.cons_costs_pi),
                    "eval/cons_cost/mt": streaming_mean(self.cons_costs_mt),
                    "eval/total_cost/pi": streaming_mean(self.total_costs_pi),
                    "eval/total_cost/mt": streaming_mean(self.total_costs_mt)
                })
            metric_variables = slim.get_local_variables(
                scope=metrics_scope.name)
            self.metric_init_op = tf.variables_initializer(metric_variables)

        with tf.name_scope("initializers"):
            init_init_variables = tf.get_collection("init_in_init")
            train_init_variables = [
                var for var in tf.global_variables()
                if var not in init_init_variables
            ]
            self.init_init_op = tf.variables_initializer(init_init_variables)
            self.train_init_op = tf.variables_initializer(train_init_variables)

        self.saver = tf.train.Saver()
        self.session = tf.Session()
        self.run(self.init_init_op)
Example #6
0
    def __init__(
        self,
        config,
        output_dir="./output",
        use_rnn=False,
        testing=False,
        use_best=False,
    ):
        self.config = config
        self.output_dir = output_dir
        self.checkpoint_path = os.path.join(self.output_dir, "checkpoint")
        self.best_ckpt_path = os.path.join(self.output_dir, "best_ckpt")
        self.weights_path = os.path.join(self.output_dir, "weights")
        self.log_dir = os.path.join(self.output_dir, "log")
        self.use_rnn = use_rnn

        # Placeholder
        with tf.variable_scope("placeholders") as scope:
            self.signals = tf.placeholder(dtype=tf.float32,
                                          shape=(None,
                                                 self.config["input_size"], 1,
                                                 1),
                                          name='signals')
            self.labels = tf.placeholder(dtype=tf.int32,
                                         shape=(None, ),
                                         name='labels')
            self.is_training = tf.placeholder(dtype=tf.bool,
                                              shape=(),
                                              name='is_training')

            if self.use_rnn:
                self.loss_weights = tf.placeholder(dtype=tf.float32,
                                                   shape=(None, ),
                                                   name='loss_weights')
                self.seq_lengths = tf.placeholder(dtype=tf.int32,
                                                  shape=(None, ),
                                                  name='seq_lengths')

        # Monitor global step update
        self.global_step = tf.Variable(0, trainable=False, name='global_step')

        # Monitor the number of epochs passed
        self.global_epoch = tf.Variable(0,
                                        trainable=False,
                                        name='global_epoch')

        # Build a network that receives inputs from placeholders
        net = self.build_cnn()

        if self.use_rnn:
            # Check whether the corresponding config is given
            if "n_rnn_layers" not in self.config:
                raise Exception("Invalid config.")
            # Append the RNN if needed
            net = self.append_rnn(net)

        # Softmax linear
        net = nn.fc("softmax_linear", net, self.config["n_classes"], bias=0.0)

        # Outputs
        self.logits = net
        self.preds = tf.argmax(self.logits, axis=1)

        # Cross-entropy loss
        self.loss_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=self.labels, logits=self.logits, name="loss_ce_per_sample")

        with tf.name_scope("loss_ce_mean") as scope:
            if self.use_rnn:
                # Weight by sequence
                loss_w_seq = tf.multiply(self.loss_weights,
                                         self.loss_per_sample)

                # Weight by class
                sample_weights = tf.reduce_sum(
                    tf.multiply(
                        tf.one_hot(indices=self.labels,
                                   depth=self.config["n_classes"]),
                        np.asarray(self.config["class_weights"],
                                   dtype=np.float32)), 1)
                loss_w_class = tf.multiply(loss_w_seq, sample_weights)

                # Computer average loss scaled with the sequence length
                self.loss_ce = tf.reduce_sum(loss_w_class) / tf.reduce_sum(
                    self.loss_weights)
            else:
                self.loss_ce = tf.reduce_mean(self.loss_per_sample)

        # Regularization loss
        self.reg_losses = self.regularization_loss()

        # Total loss
        self.loss = self.loss_ce + self.reg_losses

        # Metrics (used when we want to compute a metric from the output from minibatches)
        with tf.variable_scope("stream_metrics") as scope:
            self.metric_value_op, self.metric_update_op = contrib_metrics.aggregate_metric_map(
                {
                    "loss":
                    tf.metrics.mean(values=self.loss),
                    "accuracy":
                    tf.metrics.accuracy(labels=self.labels,
                                        predictions=self.preds),
                    "precision":
                    tf.metrics.precision(labels=self.labels,
                                         predictions=self.preds),
                    "recall":
                    tf.metrics.recall(labels=self.labels,
                                      predictions=self.preds),
                })
            # Manually create reset operations of local vars
            metric_vars = contrib_slim.get_local_variables(scope=scope.name)
            self.metric_init_op = tf.variables_initializer(metric_vars)

        # Training outputs
        self.train_outputs = {
            "global_step": self.global_step,
            "train/loss": self.loss,
            "train/preds": self.preds,
            "train/stream_metrics": self.metric_update_op,
        }
        if self.use_rnn:
            self.train_outputs.update({
                "train/init_state": self.init_state,
                "train/final_state": self.final_state,
            })

        # Test outputs
        self.test_outputs = {
            "global_step": self.global_step,
            "test/loss": self.loss,
            "test/preds": self.preds,
        }
        if self.use_rnn:
            self.test_outputs.update({
                "test/init_state": self.init_state,
                "test/final_state": self.final_state,
            })

        # Tensoflow
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        if not testing:
            self.train_writer = tf.summary.FileWriter(
                os.path.join(self.log_dir, "train"))
            self.train_writer.add_graph(self.sess.graph)
            logger.info("Saved tensorboard graph to {}".format(
                self.train_writer.get_logdir()))

        # Optimizer
        if not testing:
            # self.lr = tf.train.exponential_decay(
            #     learning_rate=self.config["learning_rate_decay"],
            #     global_step=self.global_step,
            #     decay_steps=self.config["decay_steps"],
            #     decay_rate=self.config["decay_rate"],
            #     staircase=False,
            #     name="learning_rate"
            # )
            self.lr = tf.constant(self.config["learning_rate"],
                                  dtype=tf.float32)
            with tf.variable_scope("optimizer") as scope:
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    # Pretraining
                    if not self.use_rnn:
                        self.train_step_op, self.grad_op = nn.adam_optimizer(
                            loss=self.loss,
                            training_variables=tf.trainable_variables(),
                            global_step=self.global_step,
                            # learning_rate=self.config["learning_rate"],
                            learning_rate=self.lr,
                            beta1=self.config["adam_beta_1"],
                            beta2=self.config["adam_beta_2"],
                            epsilon=self.config["adam_epsilon"],
                        )
                    # Fine-tuning
                    else:
                        # Use different learning rates for CNN and RNN
                        self.train_step_op, self.grad_op = nn.adam_optimizer_clip(
                            loss=self.loss,
                            training_variables=tf.trainable_variables(),
                            global_step=self.global_step,
                            # learning_rate=self.config["learning_rate"],
                            learning_rate=self.lr,
                            beta1=self.config["adam_beta_1"],
                            beta2=self.config["adam_beta_2"],
                            epsilon=self.config["adam_epsilon"],
                            clip_value=self.config["clip_grad_value"],
                        )

        # Initializer
        with tf.variable_scope("initializer") as scope:
            # tf.trainable_variables() or tf.global_variables()
            self.init_global_op = tf.variables_initializer(
                tf.global_variables())
            self.init_local_op = tf.variables_initializer(tf.local_variables())

        # Saver for storing variables
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
        self.best_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

        # Initialize variables
        self.run([self.init_global_op, self.init_local_op])

        # Restore variables (if possible)
        is_restore = False
        if use_best:
            if os.path.exists(self.best_ckpt_path):
                if os.path.isfile(
                        os.path.join(self.best_ckpt_path, "checkpoint")):
                    # Restore the last checkpoint
                    latest_checkpoint = tf.train.latest_checkpoint(
                        self.best_ckpt_path)
                    self.saver.restore(self.sess, latest_checkpoint)
                    logger.info("Best model restored from {}".format(
                        latest_checkpoint))
                    is_restore = True
        else:
            if os.path.exists(self.checkpoint_path):
                if os.path.isfile(
                        os.path.join(self.checkpoint_path, "checkpoint")):
                    # Restore the last checkpoint
                    latest_checkpoint = tf.train.latest_checkpoint(
                        self.checkpoint_path)
                    self.saver.restore(self.sess, latest_checkpoint)
                    logger.info(
                        "Model restored from {}".format(latest_checkpoint))
                    is_restore = True
        if not is_restore:
            logger.info("Model started from random weights")
Example #7
0
    def __init__(self, run_context=None):
        if run_context is not None:
            self.training_log = run_context.create_train_log('training')
            self.validation_log = run_context.create_train_log('validation')
            self.checkpoint_path = os.path.join(run_context.transient_dir, 'checkpoint')
            self.tensorboard_path = os.path.join(run_context.result_dir, 'tensorboard')

        with tf.name_scope("placeholders"):
            self.images = tf.placeholder(dtype=tf.float32, shape=(None, 32, 32, 3), name='images')
            self.labels = tf.placeholder(dtype=tf.int32, shape=(None,), name='labels')
            self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        tf.add_to_collection("init_in_init", self.global_step)
        self.hyper = HyperparamVariables(self.DEFAULT_HYPERPARAMS)
        for var in self.hyper.variables.values():
            tf.add_to_collection("init_in_init", var)

        with tf.name_scope("ramps"):
            sigmoid_rampup_value = sigmoid_rampup(self.global_step, self.hyper['rampup_length'])
            sigmoid_rampdown_value = sigmoid_rampdown(self.global_step,
                                                      self.hyper['rampdown_length'],
                                                      self.hyper['training_length'])
            self.learning_rate = tf.multiply(sigmoid_rampup_value * sigmoid_rampdown_value,
                                             self.hyper['max_learning_rate'],
                                             name='learning_rate')
            self.adam_beta_1 = tf.add(sigmoid_rampdown_value * self.hyper['adam_beta_1_before_rampdown'],
                                      (1 - sigmoid_rampdown_value) * self.hyper['adam_beta_1_after_rampdown'],
                                      name='adam_beta_1')
            self.cons_coefficient = tf.multiply(sigmoid_rampup_value,
                                                self.hyper['max_consistency_cost'],
                                                name='consistency_coefficient')

            step_rampup_value = step_rampup(self.global_step, self.hyper['rampup_length'])
            self.adam_beta_2 = tf.add((1 - step_rampup_value) * self.hyper['adam_beta_2_during_rampup'],
                                      step_rampup_value * self.hyper['adam_beta_2_after_rampup'],
                                      name='adam_beta_2')
            self.ema_decay = tf.add((1 - step_rampup_value) * self.hyper['ema_decay_during_rampup'],
                                    step_rampup_value * self.hyper['ema_decay_after_rampup'],
                                    name='ema_decay')

        (
            (self.class_logits_1, self.cons_logits_1),
            (self.class_logits_2, self.cons_logits_2),
            (self.class_logits_ema, self.cons_logits_ema)
        ) = inference(
            self.images,
            is_training=self.is_training,
            ema_decay=self.ema_decay,
            input_noise=self.hyper['input_noise'],
            student_dropout_probability=self.hyper['student_dropout_probability'],
            teacher_dropout_probability=self.hyper['teacher_dropout_probability'],
            normalize_input=self.hyper['normalize_input'],
            flip_horizontally=self.hyper['flip_horizontally'],
            translate=self.hyper['translate'],
            num_logits=self.hyper['num_logits'])

        with tf.name_scope("objectives"):
            self.mean_error_1, self.errors_1 = errors(self.class_logits_1, self.labels)
            self.mean_error_ema, self.errors_ema = errors(self.class_logits_ema, self.labels)

            self.mean_class_cost_1, self.class_costs_1 = classification_costs(
                self.class_logits_1, self.labels)
            self.mean_class_cost_ema, self.class_costs_ema = classification_costs(
                self.class_logits_ema, self.labels)

            labeled_consistency = self.hyper['apply_consistency_to_labeled']
            consistency_mask = tf.logical_or(tf.equal(self.labels, -1), labeled_consistency)
            self.mean_cons_cost_pi, self.cons_costs_pi = consistency_costs(
                self.cons_logits_1, self.class_logits_2, self.cons_coefficient, consistency_mask, self.hyper['consistency_trust'])
            self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs(
                self.cons_logits_1, self.class_logits_ema, self.cons_coefficient, consistency_mask, self.hyper['consistency_trust'])


            def l2_norms(matrix):
                l2s = tf.reduce_sum(matrix ** 2, axis=1)
                mean_l2 = tf.reduce_mean(l2s)
                return mean_l2, l2s

            self.mean_res_l2_1, self.res_l2s_1 = l2_norms(self.class_logits_1 - self.cons_logits_1)
            self.mean_res_l2_ema, self.res_l2s_ema = l2_norms(self.class_logits_ema - self.cons_logits_ema)
            self.res_costs_1 = self.hyper['logit_distance_cost'] * self.res_l2s_1
            self.mean_res_cost_1 = tf.reduce_mean(self.res_costs_1)
            self.res_costs_ema = self.hyper['logit_distance_cost'] * self.res_l2s_ema
            self.mean_res_cost_ema = tf.reduce_mean(self.res_costs_ema)

            self.mean_total_cost_pi, self.total_costs_pi = total_costs(
                self.class_costs_1, self.cons_costs_pi, self.res_costs_1)
            self.mean_total_cost_mt, self.total_costs_mt = total_costs(
                self.class_costs_1, self.cons_costs_mt, self.res_costs_1)
            assert_shape(self.total_costs_pi, [3])
            assert_shape(self.total_costs_mt, [3])

            self.cost_to_be_minimized = tf.cond(self.hyper['ema_consistency'],
                                                lambda: self.mean_total_cost_mt,
                                                lambda: self.mean_total_cost_pi)

        with tf.name_scope("train_step"):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step_op = nn.adam_optimizer(self.cost_to_be_minimized,
                                                       self.global_step,
                                                       learning_rate=self.learning_rate,
                                                       beta1=self.adam_beta_1,
                                                       beta2=self.adam_beta_2,
                                                       epsilon=self.hyper['adam_epsilon'])

        self.training_control = training_control(self.global_step,
                                                 self.hyper['print_span'],
                                                 self.hyper['evaluation_span'],
                                                 self.hyper['training_length'])

        self.training_metrics = {
            "learning_rate": self.learning_rate,
            "adam_beta_1": self.adam_beta_1,
            "adam_beta_2": self.adam_beta_2,
            "ema_decay": self.ema_decay,
            "cons_coefficient": self.cons_coefficient,
            "train/error/1": self.mean_error_1,
            "train/error/ema": self.mean_error_ema,
            "train/class_cost/1": self.mean_class_cost_1,
            "train/class_cost/ema": self.mean_class_cost_ema,
            "train/cons_cost/pi": self.mean_cons_cost_pi,
            "train/cons_cost/mt": self.mean_cons_cost_mt,
            "train/res_cost/1": self.mean_res_cost_1,
            "train/res_cost/ema": self.mean_res_cost_ema,
            "train/total_cost/pi": self.mean_total_cost_pi,
            "train/total_cost/mt": self.mean_total_cost_mt,
        }

        with tf.variable_scope("validation_metrics") as metrics_scope:
            self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map({
                "eval/error/1": streaming_mean(self.errors_1),
                "eval/error/ema": streaming_mean(self.errors_ema),
                "eval/class_cost/1": streaming_mean(self.class_costs_1),
                "eval/class_cost/ema": streaming_mean(self.class_costs_ema),
                "eval/res_cost/1": streaming_mean(self.res_costs_1),
                "eval/res_cost/ema": streaming_mean(self.res_costs_ema),
            })
            metric_variables = slim.get_local_variables(scope=metrics_scope.name)
            self.metric_init_op = tf.variables_initializer(metric_variables)

        self.result_formatter = string_utils.DictFormatter(
            order=["eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt"],
            default_format='{name}: {value:>10.6f}',
            separator=",  ")
        self.result_formatter.add_format('error', '{name}: {value:>6.1%}')

        with tf.name_scope("initializers"):
            init_init_variables = tf.get_collection("init_in_init")
            train_init_variables = [
                var for var in tf.global_variables() if var not in init_init_variables
            ]
            self.init_init_op = tf.variables_initializer(init_init_variables)
            self.train_init_op = tf.variables_initializer(train_init_variables)

        self.saver = tf.train.Saver()
        self.session = tf.Session()
        self.run(self.init_init_op)
Example #8
0
    def __init__(self, run_context=None):
        self.name = "Tweet Data Class"
        if run_context is not None:
            self.training_log = run_context.create_train_log('training')
            self.validation_log = run_context.create_train_log('validation')
            self.checkpoint_path = os.path.join(run_context.transient_dir, 'checkpoint')
            self.tensorboard_path = os.path.join(run_context.result_dir, 'tensorboard')

        with tf.name_scope("placeholders"):
            self.tweets = tf.placeholder(dtype=tf.float32, shape=(None, 500), name='tweets')
            self.labels = tf.placeholder(dtype=tf.int32, shape=(None,), name='labels')
            self.is_training = tf.placeholder(dtype=tf.bool, shape=(), name='is_training')

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        tf.add_to_collection("init_in_init", self.global_step)
        self.hyper = HyperparamVariables(self.DEFAULT_HYPERPARAMS)
        for var in self.hyper.variables.values():
            tf.add_to_collection("init_in_init", var)
        
        with tf.name_scope("ramps"):
            # Ramp-up and ramp-down has been removed for simplicity
#             self.learning_rate = tf.constant(self.hyper['max_learning_rate'], dtype = tf.float32)
#             self.adam_beta_1 = tf.constant(self.hyper['adam_beta_1_after_rampdown'], dtype = tf.float32)
#             self.cons_coefficient = tf.constant(self.hyper['max_consistency_cost'], dtype = tf.float32)
#             self.adam_beta_2 = tf.constant(self.hyper['adam_beta_2_after_rampup'], dtype = tf.float32)
#             self.ema_decay = tf.constant(self.hyper['ema_decay_after_rampup'], dtype = tf.float32)
#             self.learning_rate =self.DEFAULT_HYPERPARAMS['max_learning_rate']
#             self.adam_beta_1 = self.DEFAULT_HYPERPARAMS['adam_beta_1_after_rampdown']
#             self.cons_coefficient = self.DEFAULT_HYPERPARAMS['max_consistency_cost']
#             self.adam_beta_2 = self.DEFAULT_HYPERPARAMS['adam_beta_2_after_rampup']
#             self.ema_decay = self.DEFAULT_HYPERPARAMS['ema_decay_after_rampup']
            sigmoid_rampup_value = sigmoid_rampup(self.global_step, self.hyper['rampup_length'])
            sigmoid_rampdown_value = sigmoid_rampdown(self.global_step,
                                                      self.hyper['rampdown_length'],
                                                      self.hyper['training_length'])
            self.learning_rate = tf.multiply(sigmoid_rampup_value * sigmoid_rampdown_value,
                                             self.hyper['max_learning_rate'],
                                             name='learning_rate')
            self.adam_beta_1 = tf.add(sigmoid_rampdown_value * self.hyper['adam_beta_1_before_rampdown'],
                                      (1 - sigmoid_rampdown_value) * self.hyper['adam_beta_1_after_rampdown'],
                                      name='adam_beta_1')
            self.cons_coefficient = tf.multiply(sigmoid_rampup_value,
                                                self.hyper['max_consistency_cost'],
                                                name='consistency_coefficient')

            step_rampup_value = step_rampup(self.global_step, self.hyper['rampup_length'])
            self.adam_beta_2 = tf.add((1 - step_rampup_value) * self.hyper['adam_beta_2_during_rampup'],
                                      step_rampup_value * self.hyper['adam_beta_2_after_rampup'],
                                      name='adam_beta_2')
            self.ema_decay = tf.add((1 - step_rampup_value) * self.hyper['ema_decay_during_rampup'],
                                    step_rampup_value * self.hyper['ema_decay_after_rampup'],
                                    name='ema_decay')
            
        
        # below is where the interesting stuff happens, mostly.
        # Inference is a function which creates the towers and sets up the different logits for the two models
        (
            (self.class_logits_1, self.cons_logits_1),
            (self.class_logits_2, self.cons_logits_2),
            (self.class_logits_ema, self.cons_logits_ema)
        ) = inference(
            self.tweets,
            is_training=self.is_training,
            ema_decay=self.ema_decay,
            input_noise=self.hyper['input_noise'],
            hidden_dims = self.DEFAULT_HYPERPARAMS['hidden_dims'],
            student_dropout_probability=self.hyper['student_dropout_probability'],
            teacher_dropout_probability=self.hyper['teacher_dropout_probability'],
            num_logits=self.hyper['num_logits'])
        
        with tf.name_scope("objectives"):
            # something weird is done with errors for unlabeled examples. 
            # I think errors are only calculated for labeled, but you don't calculate it for unlabeled, so it is NaN for unlabeled
            self.mean_error_1, self.errors_1 = errors(self.class_logits_1, self.labels)
            self.mean_error_ema, self.errors_ema = errors(self.class_logits_ema, self.labels)
            # where we calculate classification costs.
            # the cost_1 should be for student and ema is for teacher
            self.mean_class_cost_1, self.class_costs_1 = classification_costs(
                self.class_logits_1, self.labels)
            self.mean_class_cost_ema, self.class_costs_ema = classification_costs(
                self.class_logits_ema, self.labels)

            labeled_consistency = self.hyper['apply_consistency_to_labeled']
            consistency_mask = tf.logical_or(tf.equal(self.labels, -1), labeled_consistency)
            self.mean_cons_cost_mt, self.cons_costs_mt = consistency_costs( self.cons_logits_1, self.class_logits_ema, self.cons_coefficient, consistency_mask)


            def l2_norms(matrix):
                l2s = tf.reduce_sum(matrix ** 2, axis=1)
                mean_l2 = tf.reduce_mean(l2s)
                return mean_l2, l2s

            self.mean_res_l2_1, self.res_l2s_1 = l2_norms(self.class_logits_1 - self.cons_logits_1)
            self.mean_res_l2_ema, self.res_l2s_ema = l2_norms(self.class_logits_ema - self.cons_logits_ema)

            # mean total cost is what you are optimizng. 
            self.mean_total_cost_mt, self.total_costs_mt = total_costs(
                self.class_costs_1, self.cons_costs_mt)
            assert_shape(self.total_costs_mt, [2])

            self.cost_to_be_minimized = self.mean_total_cost_mt

        with tf.name_scope("train_step"):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step_op = adam_optimizer(self.cost_to_be_minimized,
                                                       self.global_step,
                                                       learning_rate=self.learning_rate,
                                                       beta1=self.adam_beta_1,
                                                       beta2=self.adam_beta_2,
                                                       epsilon=self.hyper['adam_epsilon'])

        # TODO do we really need this?
        self.training_control = training_control(self.global_step,
                                                 self.hyper['print_span'],
                                                 self.hyper['evaluation_span'],
                                                 self.hyper['training_length'])

        self.training_metrics = {
            # NOTE these should not need training, since we don't do ramp-up and ramp-down
            "learning_rate": self.learning_rate,
            "adam_beta_1": self.adam_beta_1,
            "adam_beta_2": self.adam_beta_2,
            "ema_decay": self.ema_decay,
            "cons_coefficient": self.cons_coefficient,
            "train/error/1": self.mean_error_1,
            "train/error/ema": self.mean_error_ema,
            "train/class_cost/1": self.mean_class_cost_1,
            "train/class_cost/ema": self.mean_class_cost_ema,
            "train/cons_cost/mt": self.mean_cons_cost_mt,
            "train/total_cost/mt": self.mean_total_cost_mt,
        }

        # TODO not sure what streaming mean does?
        with tf.variable_scope("validation_metrics") as metrics_scope:
            self.metric_values, self.metric_update_ops = metrics.aggregate_metric_map({
                "eval/error/1": streaming_mean(self.errors_1),
                "eval/error/ema": streaming_mean(self.errors_ema),
                "eval/class_cost/1": streaming_mean(self.class_costs_1),
                "eval/class_cost/ema": streaming_mean(self.class_costs_ema),
            })
            metric_variables = slim.get_local_variables(scope=metrics_scope.name)
            self.metric_init_op = tf.variables_initializer(metric_variables)

        # TODO string utils just formats dictionary results in a nice way for logging, not needed?
        self.result_formatter = string_utils.DictFormatter(
            order=["eval/error/ema", "error/1", "class_cost/1", "cons_cost/mt"],
            default_format='{name}: {value:>10.6f}',
            separator=",  ")
        self.result_formatter.add_format('error', '{name}: {value:>6.1%}')

        with tf.name_scope("initializers"):
            init_init_variables = tf.get_collection("init_in_init")
            train_init_variables = [
                var for var in tf.global_variables() if var not in init_init_variables
            ]
            self.init_init_op = tf.variables_initializer(init_init_variables)
            print("Train init variables:")
            for var in train_init_variables:
                print(var)
            self.train_init_op = tf.variables_initializer(train_init_variables)
        self.saver = tf.train.Saver()
        self.session = tf.Session()
        self.run(self.init_init_op)