Exemple #1
0
    def test_build_losses(self):
        """build_losses generates the loss function for both components."""
        # Setup
        metadata = {
            'details': [{
                'type': 'value',
                'n': 5,
            }, {
                'type': 'category',
                'n': 2
            }],
            'num_columns': 2
        }
        instance = GraphBuilder(metadata)
        logits_real = np.zeros((10, 10), dtype=np.float32)
        logits_fake = np.zeros((10, 10), dtype=np.float32)
        extra_g = 1.0
        l2_norm = 0.001

        inputs = [
            np.full((200, 1), 0.0, dtype=np.float32),
            np.full((200, 5), 1.0, dtype=np.float32),
            np.full((200, 1), 0)
        ]
        with TowerContext('', is_training=False):
            instance.build_graph(*inputs)

            # Run
            result = instance.build_losses(logits_real, logits_fake, extra_g,
                                           l2_norm)

        # Check
        assert result is None
Exemple #2
0
    def _build_gan_trainer(self, input, model):
        """
        We need to set tower_func because it's a TowerTrainer,
        and only TowerTrainer supports automatic graph creation for inference during training.

        If we don't care about inference during training, using tower_func is
        not needed. Just calling model.build_graph directly is OK.
        """
        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph,
                                           model.get_input_signature())
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())
        opt = model.get_optimizer()

        # Define the training iteration
        # by default, run one d_min after one g_min
        with tf.name_scope('optimize'):
            g_min = opt.minimize(model.g_loss,
                                 var_list=model.g_vars,
                                 name='g_op')
            with tf.control_dependencies([g_min]):
                d_min = opt.minimize(model.d_loss,
                                     var_list=model.d_vars,
                                     name='d_op')
        self.train_op = d_min
Exemple #3
0
    def test_discriminator(self):
        """ """
        # Setup
        metadata = {}
        instance = GraphBuilder(metadata, num_dis_layers=1)
        vecs = [np.zeros((7, 10)), np.ones((7, 10))]

        # Run
        with TowerContext('', is_training=False):
            result = instance.discriminator(vecs)

        # Check
        assert result.name == 'dis_fc_top/output:0'
        assert result.shape.as_list() == [7, 1]
        assert result.dtype == tf.float64

        graph = result.graph

        self.check_operation_nodes(graph, 'concat', 'ConcatV2', tf.float64,
                                   [7, 20], ['dis_fc0/fc/Reshape'])

        self.check_operation_nodes(
            graph, 'dis_fc0/fc/output', 'Identity', tf.float64, [7, 100],
            ['dis_fc0/fc_diversity/Reshape', 'dis_fc0/concat'])
        self.check_operation_nodes(graph, 'dis_fc0/fc_diversity/output',
                                   'Identity', tf.float64, [7, 100],
                                   ['dis_fc0/Reshape'])
        self.check_operation_nodes(graph, 'dis_fc0/concat', 'ConcatV2',
                                   tf.float64, [7, 110],
                                   ['dis_fc0/bn/batchnorm/mul'])
Exemple #4
0
    def __init__(self, input, model):
        """
        Args:
            input (InputSource):
            model (GANModelDesc):
        """
        super(GANTrainer, self).__init__()
        assert isinstance(model, GANModelDesc), model
        inputs_desc = model.get_inputs_desc()
        # Setup input
        cbs = input.setup(inputs_desc)
        self.register_callback(cbs)

        """
        We need to set tower_func because it's a TowerTrainer,
        and only TowerTrainer supports automatic graph creation for inference during training.

        If we don't care about inference during training, using tower_func is
        not needed. Just calling model.build_graph directly is OK.
        """
        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc)
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())
        opt = model.get_optimizer()

        # Define the training iteration
        # by default, run one d_min after one g_min
        with tf.name_scope('optimize'):
            g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_op')
            with tf.control_dependencies([g_min]):
                d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_op')
        self.train_op = d_min
Exemple #5
0
    def __init__(self, input, model, d_period=1, g_period=1):
        """
        Args:
            d_period(int): period of each d_opt run
            g_period(int): period of each g_opt run
        """
        super(SeparateGANTrainer, self).__init__()
        self._d_period = int(d_period)
        self._g_period = int(g_period)
        assert min(d_period, g_period) == 1

        # Setup input
        cbs = input.setup(model.get_inputs_desc())
        self.register_callback(cbs)

        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc())
        with TowerContext('', is_training=True), \
                argscope(BatchNorm, internal_update=True):
                # should not hook the updates to both train_op, it will hurt training speed.
            self.tower_func(*input.get_input_tensors())
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if len(update_ops):
            logger.warn("Found {} ops in UPDATE_OPS collection!".format(len(update_ops)))
            logger.warn("Using SeparateGANTrainer with UPDATE_OPS may hurt your training speed a lot!")

        opt = model.get_optimizer()
        with tf.name_scope('optimize'):
            self.d_min = opt.minimize(
                model.d_loss, var_list=model.d_vars, name='d_min')
            self.g_min = opt.minimize(
                model.g_loss, var_list=model.g_vars, name='g_min')
Exemple #6
0
    def __init__(self, input, model):
        super(GANTrainer, self).__init__()
        assert isinstance(model, GANModelDesc), model
        inputs_desc = model.get_inputs_desc()
        cbs = input.setup(inputs_desc)

        # we need to set towerfunc because it's a TowerTrainer,
        # and only TowerTrainer supports automatic graph creation for inference during training.
        tower_func = TowerFuncWrapper(model.build_graph, inputs_desc)
        with TowerContext('', is_training=True):
            tower_func(*input.get_input_tensors())
        opt = model.get_optimizer()

        # by default, run one d_min after one g_min
        with tf.name_scope('optimize'):
            g_min = opt.minimize(model.g_loss,
                                 var_list=model.g_vars,
                                 name='g_op')
            with tf.control_dependencies([g_min]):
                d_min = opt.minimize(model.d_loss,
                                     var_list=model.d_vars,
                                     name='d_op')
        self.train_op = d_min
        self.set_tower_func(tower_func)

        for cb in cbs:
            self.register_callback(cb)
Exemple #7
0
    def __init__(self, input, model, d_period=1, g_period=1):
        """Initialize object."""
        super(SeparateGANTrainer, self).__init__()
        self._d_period = int(d_period)
        self._g_period = int(g_period)
        if not min(d_period, g_period) == 1:
            raise ValueError(
                'The minimum between d_period and g_period must be 1.')

        # Setup input
        cbs = input.setup(model.get_inputs_desc())
        self.register_callback(cbs)

        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph,
                                           model.get_inputs_desc())
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())

        opt = model.get_optimizer()
        with tf.name_scope('optimize'):
            self.d_min = opt.minimize(model.d_loss,
                                      var_list=model.d_vars,
                                      name='d_min')
            self.g_min = opt.minimize(model.g_loss,
                                      var_list=model.g_vars,
                                      name='g_min')
Exemple #8
0
    def __init__(self, model, input_queue):
        """Initialize object."""
        super().__init__()
        inputs_desc = model.get_inputs_desc()

        # Setup input
        cbs = input_queue.setup(inputs_desc)
        self.register_callback(cbs)

        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc)
        with TowerContext('', is_training=True):
            self.tower_func(*input_queue.get_input_tensors())

        opt = model.get_optimizer()

        # Define the training iteration by default, run one d_min after one g_min
        with tf.name_scope('optimize'):
            g_min_grad = opt.compute_gradients(model.g_loss,
                                               var_list=model.g_vars)
            g_min_grad_clip = [(tf.clip_by_value(grad, -5.0, 5.0), var)
                               for grad, var in g_min_grad]

            g_min_train_op = opt.apply_gradients(g_min_grad_clip, name='g_op')
            with tf.control_dependencies([g_min_train_op]):
                d_min_grad = opt.compute_gradients(model.d_loss,
                                                   var_list=model.d_vars)
                d_min_grad_clip = [(tf.clip_by_value(grad, -5.0, 5.0), var)
                                   for grad, var in d_min_grad]

                d_min_train_op = opt.apply_gradients(d_min_grad_clip,
                                                     name='d_op')

        self.train_op = d_min_train_op
Exemple #9
0
    def __init__(self, input, model, d_period=1, g_period=1):
        """
        Args:
            d_period(int): period of each d_opt run
            g_period(int): period of each g_opt run
        """
        super(SeparateGANTrainer, self).__init__()
        self._d_period = int(d_period)
        self._g_period = int(g_period)
        assert min(d_period, g_period) == 1

        # Setup input
        cbs = input.setup(model.get_inputs_desc())
        self.register_callback(cbs)

        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph,
                                           model.get_inputs_desc())
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())

        opt = model.get_optimizer()
        with tf.name_scope('optimize'):
            self.d_min = opt.minimize(model.d_loss,
                                      var_list=model.d_vars,
                                      name='d_min')
            self.g_min = opt.minimize(model.g_loss,
                                      var_list=model.g_vars,
                                      name='g_min')
Exemple #10
0
    def __init__(self, config):
        """
        Args:
            config (PredictConfig): the config to use.
        """
        self._input_names = config.input_names
        self.graph = config._maybe_create_graph()
        with self.graph.as_default():
            input = PlaceholderInput()
            input.setup(config.input_signature)
            with TowerContext('', is_training=False):
                config.tower_func(*input.get_input_tensors())

            input_tensors = get_tensors_by_names(config.input_names)
            output_tensors = get_tensors_by_names(config.output_names)

            config.session_init._setup_graph()
            self.saver = tf.train.Saver()
            init_op = [
                tf.global_variables_initializer(),
                tf.local_variables_initializer()
            ]
            self.sess = config.session_creator.create_session()
            self.sess.run(init_op)
            config.session_init._run_init(self.sess)
            super(OfflinePredictorWithSaver,
                  self).__init__(input_tensors, output_tensors,
                                 config.return_input, self.sess)
Exemple #11
0
 def forward(self, inputs, is_training=False):
     image = self.image_preprocess(inputs)
     assert self.data_format in ['NCHW', 'NHWC']
     if self.data_format == 'NCHW':
         image = tf.transpose(image, [0, 3, 1, 2])
     with TowerContext('', is_training):
         logits = self.get_logits(image)
     return logits
    def graph(self, x, y, i, x_max, x_min):
        with TowerContext("model_tower", is_training=False):
            logits, _, endpoints = network.model(x, FLAGS.attack_networks[0])

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=y)
        noise = tf.gradients(loss,
                             x)[0] if not FLAGS.universal else tf.zeros_like(x)
        with TowerContext('RHP_tower', is_training=False):
            with tf.variable_scope('RHP'):
                noise = conv_with_rn(noise)
        noise = noise / (
            tf.reduce_mean(tf.abs(noise), [1, 2, 3], keepdims=True) + 1e-12)
        x = x + self.step_size * tf.sign(noise)
        x = tf.clip_by_value(x, x_min, x_max)
        i = tf.add(i, 1)
        return x, y, i, x_max, x_min
Exemple #13
0
    def forward(self, inputs,is_training=False):
        self._parseInputs(inputs)
        self.image = self.preprocess(self.X)     # 1CHW
        with TowerContext('',is_training):
            self.features = self._backbone()

            self.rpn_box_logit,self.rpn_label_logit,self.proposals_boxes=self._rpn_head(is_training)
            self.roi_box_logit,self.roi_label_logit=self._roi_head(is_training)
def build_imagenet_model(image, label, reuse=False, conf=1):
    args = container()
    args.depth = 101
    with TowerContext(tower_name='', is_training=False):
        with tf.variable_scope("", auxiliary_name_scope=False, reuse=reuse):
            model = ResNeXtDenoiseAllModel(args)
            model.build_graph(image, label)
    return model.logits
Exemple #15
0
 def build_graph(self):
     batch_shape = [None, 299, 299, 3]
     self.x_input = tf.placeholder(tf.float32, shape=batch_shape)
     self.y_input = tf.placeholder(tf.int64, shape=batch_shape[0])
     self.acc_list = []
     self.predictions = []
     with TowerContext("model_tower", is_training=False):
         for network_name in FLAGS.test_networks:
             acc, predictions = network.model(self.x_input, network_name, label=self.y_input)
             self.acc_list.append(acc)
             self.predictions.append(predictions)
Exemple #16
0
    def _build_trainer(self, input, model):
        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph, model.inputs())
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())

        # Define the training iteration
        with tf.name_scope("optimize"):
            opt = model.get_optimizer()
            op_min = opt.minimize(model.loss, var_list=model.vars,
                colocate_gradients_with_ops=True, name="op_min")
        self.train_op = op_min
Exemple #17
0
    def __init__(self, gan_input, a3c_input, gan_model, a3c_model):
        """
        Args:
            input (InputSource):
            model (GANModelDesc):
        """
        super(AGTrainer, self).__init__()
        assert isinstance(gan_model, GANModelDesc), gan_model
        gan_inputs_desc = gan_model.get_inputs_desc()
        a3c_inputs_desc = a3c_model.get_inputs_desc()
        self.register_callback(
            gan_input.setup(gan_inputs_desc) +
            a3c_input.setup(a3c_inputs_desc))
        """
        We need to set tower_func because it's a TowerTrainer,
        and only TowerTrainer supports automatic graph creation for inference during training.
        If we don't care about inference during training, using tower_func is
        not needed. Just calling model.build_graph directly is OK.
        """
        # Build the graph
        self.tower_func = TowerFuncWrapper(
            lambda *x: [
                gan_model.build_graph(*x[:len(gan_inputs_desc)]),
                a3c_model.build_graph(*x[len(gan_inputs_desc):])
            ], gan_inputs_desc + a3c_inputs_desc)
        with TowerContext('', is_training=True):
            self.tower_func(*(gan_input.get_input_tensors() +
                              a3c_input.get_input_tensors()))

        gan_opt = gan_model.get_optimizer()
        with tf.name_scope('gan_optimize'):
            self.d_min = gan_opt.minimize(gan_model.d_loss,
                                          var_list=gan_model.d_vars,
                                          name='d_min')
            self.g_min = gan_opt.minimize(gan_model.g_loss,
                                          var_list=gan_model.g_vars,
                                          name='g_min')

        a3c_opt = a3c_model.get_optimizer()
        with tf.name_scope('a3c_optimize'):
            self.a3c_min = a3c_opt.minimize(a3c_model.loss,
                                            var_list=a3c_model.vars,
                                            name='a3c_min')

        self.generator = self.get_predictor(['z'], ['gen/gen'])
        self.memory = GoalMemory(MEMORY_SIZE)
        self.env = WrappedEnv()

        def reset(s, goal):
            s.super().reset()
            s.goal = goal

        self.env.reset = reset
def build_imagenet_model_old(image, label, reuse=False, conf=1):
    args = container()
    args.depth = 101
    with TowerContext(tower_name='', is_training=False):
        with tf.variable_scope("", auxiliary_name_scope=False, reuse=reuse):
            model = ResNeXtDenoiseAllModel(args)
            model.build_graph(image, label)
    cont = container
    cont.logits = model.logits
    cont.label = tf.argmax(cont.logits, axis=-1)
    cont.acc_y = 1 - model.wrong_1
    cont.acc_y_5 = 1 - model.wrong_5
    cont.accuracy = tf.reduce_mean(1 - model.wrong_1)  # wrong_5
    cont.rev_xent = tf.reduce_mean(
        tf.log(1 - tf.reduce_sum(tf.nn.softmax(model.logits) *
                                 tf.one_hot(label, depth=1000),
                                 axis=-1)))
    cont.poss_loss = 1 - tf.reduce_mean(
        tf.reduce_sum(
            tf.nn.softmax(model.logits) * tf.one_hot(label, depth=1000),
            axis=-1))

    label_one_hot = tf.one_hot(label, depth=1000)
    wrong_logit = tf.reduce_max(model.logits * (1 - label_one_hot) -
                                label_one_hot * 1e7,
                                axis=-1)
    true_logit = tf.reduce_sum(model.logits * label_one_hot, axis=-1)
    #wrong_logit = tf.contrib.nn.nth_element(model.logits * (1-label_one_hot) - label_one_hot * 1e7, n=5, reverse=True)
    wrong_logit5, _idx = tf.nn.top_k(model.logits * (1 - label_one_hot) -
                                     label_one_hot * 1e7,
                                     k=5,
                                     sorted=False)
    true_logit5 = tf.reduce_sum(model.logits * label_one_hot,
                                axis=-1,
                                keep_dims=True)
    cont.target_loss5 = -tf.reduce_sum(
        tf.nn.relu(true_logit5 - wrong_logit5 + conf), axis=1)
    cont.target_loss = -tf.nn.relu(true_logit - wrong_logit + conf)
    cont.xent_filter = tf.reduce_mean(
        (1.0 - model.wrong_1) * tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label, logits=model.logits),
        axis=-1)

    cont.xent = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=label, logits=model.logits),
                               axis=-1)
    #cont.target_loss =  tf.nn.sparse_softmax_cross_entropy_with_logits(
    #    labels=label, logits=model.logits) * tf.nn.relu(tf.minimum(1.0, true_logit - wrong_logit + conf))
    return cont
Exemple #19
0
    def test_compute_kl(self):
        """ """
        # Setup
        real = np.array([1.0, 1.0])
        pred = np.array([0.0, 1.0])

        expected_result = np.array([0.0, 0.0])

        # Run
        with self.test_session():
            with TowerContext('', is_training=False):
                result = GraphBuilder.compute_kl(real, pred).eval()

        # Check
        assert_equal(result, expected_result)
Exemple #20
0
    def test_build_graph(self):
        """ """
        # Setup
        metadata = {'details': [{'type': 'value', 'n': 5}]}
        instance = GraphBuilder(metadata)
        inputs = [
            np.full((50, 5), 0.0, dtype=np.float32),
            np.full((50, 1), 1.0, dtype=np.float32)
        ]

        # Run
        with TowerContext('', is_training=False):
            result = instance.build_graph(*inputs)

        # Check
        assert result is None
Exemple #21
0
    def test_batch_diversity(self):
        """ """
        # Setup
        layer = tf.Variable(np.zeros(15))
        n_kernel = 20
        kernel_dim = 30

        expected_result = np.full((15, 20), 15.0)

        # Run
        result = GraphBuilder.batch_diversity(layer, n_kernel, kernel_dim)

        # Check - Output properties
        assert result.name == 'Sum_1:0'
        assert result.dtype == tf.float64
        assert result.shape.as_list() == [15, 20]

        graph = result.graph

        # Check - Nodes
        self.check_operation_nodes(graph, 'fc_diversity/output', 'Identity',
                                   tf.float64, [15, 600], ['Reshape'])
        self.check_operation_nodes(graph, 'Reshape', 'Reshape', tf.float64,
                                   [15, 20, 30], ['Reshape_1', 'Reshape_2'])
        self.check_operation_nodes(graph, 'Reshape_1', 'Reshape', tf.float64,
                                   [15, 1, 20, 30], ['sub'])
        self.check_operation_nodes(graph, 'Reshape_2', 'Reshape', tf.float64,
                                   [1, 15, 20, 30], ['sub'])
        self.check_operation_nodes(graph, 'sub', 'Sub', tf.float64,
                                   [15, 15, 20, 30], ['Abs'])
        self.check_operation_nodes(graph, 'Abs', 'Abs', tf.float64,
                                   [15, 15, 20, 30], ['Sum'])
        self.check_operation_nodes(graph, 'Sum', 'Sum', tf.float64,
                                   [15, 15, 20], ['Neg'])
        self.check_operation_nodes(graph, 'Neg', 'Neg', tf.float64,
                                   [15, 15, 20], ['Exp'])
        self.check_operation_nodes(graph, 'Exp', 'Exp', tf.float64,
                                   [15, 15, 20], ['Sum_1'])
        self.check_operation_nodes(graph, 'Sum_1', 'Sum', tf.float64, [15, 20],
                                   [])

        with self.test_session():
            with TowerContext('', is_training=False):
                tf.initialize_all_variables().run()
                result = result.eval()

        assert_equal(result, expected_result)
Exemple #22
0
    def _build_vde_trainer(self, input, model):
        """
        Args:
            input (InputSource):
            model (VDEModelDesc):
        """
        # Build the graph
        self.tower_func = TowerFuncWrapper(
            model.build_graph, model.get_input_signature())
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())
        opt = model.get_optimizer()

        with tf.name_scope('optimize'):
            vde_min = opt.minimize(model.total_loss,
                                   var_list=[model.encode_vars, model.predict_vars, model.decode_vars], name='train_op')
        self.train_op = vde_min
Exemple #23
0
    def __init__(self, input, model):
        super(S2BTrainer, self).__init__()
        # assert isinstance(model, GANModelDesc), model
        inputs_desc = model.get_inputs_desc()

        # Setup input
        cbs = input.setup(inputs_desc)
        for cb in cbs:
            self.register_callback(cb)

        """
        We need to set tower_func because it's a TowerTrainer,
        and only TowerTrainer supports automatic graph creation for inference during training.
        If we don't care about inference during training, using tower_func is
        not needed. Just calling model.build_graph directly is OK.
        """
        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc)
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())
        opt = model.get_optimizer()

        # Define the training iteration
        # by default, run one d_min after one g_min
        with tf.name_scope('Optimize'):
            self.train_op_d = opt.minimize(model.l_gan_d, var_list=model.d_vars, name='Train_Op_d')

            # with tf.control_dependencies([train_op_d]):
            train_op_gan_g = opt.minimize(model.l_gan_g, var_list=model.g_vars, name='Train_Op_gan_g')
            train_op_const = opt.minimize(model.l_const, var_list=model.g_vars, name='Train_Op_const')
            train_op_tid = opt.minimize(model.l_tid, var_list=model.g_vars, name='Train_Op_tid')
            train_op_tv = opt.minimize(model.l_tv, var_list=model.g_vars, name='Train_Op_tv')

            train_op_g = tf.group(train_op_gan_g, train_op_const, train_op_tid, train_op_tv)

            with tf.control_dependencies([train_op_g]):
                train_op_c_g = opt.minimize(model.l_c, var_list=model.c_vars + model.g_vars,
                                            name='Train_Op_c_g')

            self.train_op_c_g = train_op_c_g

            self.d_uncertainty = model.d_uncertainty
            self.threshold = model.d_uncertainty_threshold
    def __init__(self, config):
        """
        Args:
            config (PredictConfig): the config to use.
        """
        self.graph = config._maybe_create_graph()
        with self.graph.as_default():
            input = PlaceholderInput()
            input.setup(config.inputs_desc)
            with TowerContext('', is_training=False):
                config.tower_func(*input.get_input_tensors())

            input_tensors = get_tensors_by_names(config.input_names)
            output_tensors = get_tensors_by_names(config.output_names)

            config.session_init._setup_graph()
            sess = config.session_creator.create_session()
            config.session_init._run_init(sess)
            super(OfflinePredictor, self).__init__(
                input_tensors, output_tensors, config.return_input, sess)
Exemple #25
0
    def __init__(self, input, model):
        super(BNNTrainer, self).__init__()

        cbs = input.setup(model.get_inputs_desc())
        self.register_callback(cbs)

        self.tower_func = TowerFuncWrapper(
            model.build_graph, model.get_inputs_desc())
        
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())
            opt = model.get_optimizer()

        with tf.name_scope('optimize'):
            opt_op = opt.minimize(
                model.inf_loss, var_list=model.inf_vars, name='inf_op')
            if len(model.map_vars) > 0:
                with tf.control_dependencies([opt_op]):
                    opt_op = opt.minimize(
                        model.map_loss, var_list=model.map_vars, name='map_op')
        self.train_op = opt_op
Exemple #26
0
    def __init__(self, input, model):
        super(GANTrainer, self).__init__()
        assert isinstance(model, GANModelDesc), model
        inputs_desc = model.get_inputs_desc()
        # Setup input
        cbs = input.setup(inputs_desc)
        self.register_callback(cbs)
        self.model = model
        """
        We need to set tower_func because it's a TowerTrainer,
        and only TowerTrainer supports automatic graph creation for inference during training.

        If we don't care about inference during training, using tower_func is
        not needed. Just calling model.build_graph directly is OK.
        """
        # Build the graph
        self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc)
        with TowerContext('', is_training=True):
            self.tower_func(*input.get_input_tensors())
        opt = model.get_optimizer()
        # Define the training iteration
        # by default, run one d_min after one g_min
        with tf.name_scope('optimize'):
            g_min_grad = opt.compute_gradients(model.g_loss,
                                               var_list=model.g_vars)
            g_min_grad_clip = [(tf.clip_by_value(grad, -5., 5.), var)
                               for grad, var in g_min_grad]
            g_min_train_op = opt.apply_gradients(g_min_grad_clip, name='g_op')
            with tf.control_dependencies([g_min_train_op]):
                d_min_grad = opt.compute_gradients(model.d_loss,
                                                   var_list=model.d_vars)
                d_min_grad_clip = [(tf.clip_by_value(grad, -5., 5.), var)
                                   for grad, var in d_min_grad]
                d_min_train_op = opt.apply_gradients(d_min_grad_clip,
                                                     name='d_op')

        self.train_op = d_min_train_op
Exemple #27
0
    def train(self):

        # Create session
        tfconfig = tf.ConfigProto(allow_soft_placement=True)
        tfconfig.gpu_options.allow_growth = True
        sess = tf.Session(config=tfconfig)

        losser = []

        with sess.graph.as_default():

            tf.set_random_seed(cfg.FLAGS.rng_seed)
            with TowerContext('', is_training=False):
                layers = self.net.create_architecture(sess,
                                                      "TRAIN",
                                                      self.imdb.num_classes,
                                                      tag='default')
            loss = layers['total_loss']
            lr = tf.Variable(cfg.FLAGS.learning_rate, trainable=False)
            momentum = cfg.FLAGS.momentum
            optimizer = tf.train.MomentumOptimizer(lr, momentum)
            # optimizer = tf.train.AdamOptimizer(lr)

            gvs = optimizer.compute_gradients(loss)

            # Double bias
            # Double the gradient of the bias if set
            if cfg.FLAGS.double_bias:
                final_gvs = []
                with tf.variable_scope('Gradient_Mult'):
                    for grad, var in gvs:
                        scale = 1.
                        if cfg.FLAGS.double_bias and '/biases:' in var.name:
                            scale *= 2.
                        if not np.allclose(scale, 1.0):
                            grad = tf.multiply(grad, scale)
                        final_gvs.append((grad, var))
                train_op = optimizer.apply_gradients(final_gvs)
            else:
                train_op = optimizer.apply_gradients(gvs)

            # We will handle the snapshots ourselves
            self.saver = tf.train.Saver(max_to_keep=100000)

            # Write the train and validation information to tensorboard

            #writer = tf.summary.FileWriter(self.tbdir, sess.graph)
            #valwriter = tf.summary.FileWriter(self.tbvaldir)

        # Load weights
        # Fresh train directly from ImageNet weights
        #print('Loading initial model weights from {:s}'.format(cfg.FLAGS.pretrained_model))
        variables = tf.global_variables()
        # Initialize all variables first
        #
        pretrained_model = r'F:\GhostNet\ghostnet\models\ghostnet_checkpoint'
        sess.run(tf.variables_initializer(variables, name='init'))
        var_keep_dic = self.get_variables_in_checkpoint_file(pretrained_model)
        #var_keep_dic = get_model_loader(pretrained_model)
        # Get the variables to restore, ignorizing the variables to fix
        variables_to_restore = self.net.get_variables_to_restore(
            variables, var_keep_dic)

        restorer = tf.train.Saver(variables_to_restore)
        restorer.restore(sess, pretrained_model)
        print('Loaded.')
        # Need to fix the variables before loading, so that the RGB weights are changed to BGR
        # For VGG16 it also changes the convolutional weights fc6 and fc7 to
        # fully connected weights
        self.net.fix_variables(sess, pretrained_model)
        print('Fixed.')
        sess.run(tf.assign(lr, cfg.FLAGS.learning_rate))
        last_snapshot_iter = 0

        timer = Timer()
        iter = last_snapshot_iter + 1
        last_summary_time = time.time()

        fig = plt.figure()
        arx = fig.add_subplot(1, 1, 1)

        while iter < cfg.FLAGS.max_iters + 1:
            # Learning rate
            if iter == cfg.FLAGS.step_size + 1:
                # Add snapshot here before reducing the learning rate
                # self.snapshot(sess, iter)
                sess.run(
                    tf.assign(lr, cfg.FLAGS.learning_rate * cfg.FLAGS.gamma))

            timer.tic()
            # Get training data, one batch at a time
            blobs = self.data_layer.forward()

            # Compute the graph without summary
            rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = self.net.train_step(
                sess, blobs, train_op)
            timer.toc()
            iter += 1

            # Display training information
            if iter % (cfg.FLAGS.display) == 0:
                losser.append(total_loss)
                print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n '
                      '>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n ' % \
                      (iter, cfg.FLAGS.max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box))
                print('speed: {:.3f}s / iter'.format(timer.average_time))

                #arx.cla()
                #arx.plot(losser,'bo-')
                #plt.pause(0.1)

            if iter % cfg.FLAGS.snapshot_iterations == 0:
                self.snapshot(sess, iter)
Exemple #28
0
    def _add_forward_graph(self, student=0.5):
        """NN architecture."""
        def shufflenet_unit(l, out_channel, group, stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]
            shortcut = l

            first_split = group if in_channel != 24 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=BN,
                          stride=stride)
            l = Conv2D('conv2',
                       l,
                       out_channel if stride == 1 else out_channel -
                       in_channel,
                       kernel_shape=1,
                       split=group,
                       nl=BN)
            if stride == 1:
                output = tf.nn.relu(shortcut + l)

            else:
                shortcut = AvgPooling('avgpool',
                                      shortcut,
                                      3,
                                      2,
                                      padding='SAME')
                output = tf.concat([shortcut, tf.nn.relu(l)], axis=1)
            return output

        def shufflenet_unit_add(l, out_channel, group, stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]
            shortcut = l

            first_split = group if in_channel != 24 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=BN,
                          stride=stride)

            l = Conv2D('conv2',
                       l,
                       out_channel,
                       kernel_shape=1,
                       split=first_split,
                       nl=BN)

            output = tf.nn.relu(shortcut + l)
            return output

        def shufflenet_unit_no_shortcut(l, out_channel, group, stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]

            first_split = group if in_channel != 24 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=BN,
                          stride=stride)

            l = Conv2D('conv2',
                       l,
                       out_channel,
                       kernel_shape=1,
                       split=first_split,
                       nl=BN)

            output = tf.nn.relu(l)
            return output

        mc = self.mc

        with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \
             argscope(Conv2D, use_bias=False):
            with TowerContext('', is_training=mc.IS_TRAINING):
                group = 3
                channels = [
                    int(240 * student),
                    int(480 * student),
                    int(960 * student)
                ]
                l = tf.transpose(self.image_input, [0, 3, 1, 2])
                l = Conv2D('conv1', l, 24, 3, stride=1, nl=BNReLU)
                l = MaxPooling('pool1', l, 3, 2, padding='SAME')

                with tf.variable_scope('group1'):
                    for i in range(4):
                        with tf.variable_scope('block{}'.format(i)):
                            l = shufflenet_unit(l, channels[0], group,
                                                2 if i == 0 else 1)

                with tf.variable_scope('group2'):
                    for i in range(6):
                        with tf.variable_scope('block{}'.format(i)):
                            l = shufflenet_unit(l, channels[1], group,
                                                2 if i == 0 else 1)

                with tf.variable_scope('group3'):
                    for i in range(4):
                        with tf.variable_scope('block{}'.format(i)):
                            l = shufflenet_unit(l, channels[2], group,
                                                2 if i == 0 else 1)

                with tf.variable_scope('added3'):
                    with tf.variable_scope('block{}'.format(0)):
                        l = shufflenet_unit_add(l, int(960 * student), 3, 1)
                    with tf.variable_scope('block{}'.format(1)):
                        l = shufflenet_unit_no_shortcut(
                            l, int(768 * student), 3, 1)  #768, 384, 192

        l = tf.transpose(l, [0, 2, 3, 1])
        dropout11 = tf.nn.dropout(l, self.keep_prob, name='drop11')

        num_output = mc.ANCHOR_PER_GRID * (mc.CLASSES + 1 + 4)

        self.preds = self._conv_layer_no_pretrain('conv12',
                                                  dropout11,
                                                  filters=num_output,
                                                  size=3,
                                                  stride=1,
                                                  padding='SAME',
                                                  xavier=False,
                                                  relu=False,
                                                  stddev=0.0001)
Exemple #29
0
    def _add_forward_graph(self, student=0.5):
        """NN architecture."""

        self.image_input, self.input_mask, self.box_delta_input, \
        self.box_input, self.labels, self.mimic_mask, self.mimic_mask2 = self.batch_data_queue.dequeue()

        def shufflenet_unit_supervisor(l, out_channel, group, stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]
            shortcut = l

            # We do not apply group convolution on the first pointwise layer
            # because the number of input channels is relatively small.
            first_split = group if in_channel != 16 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=BN,
                          stride=stride)

            l = Conv2D('conv2',
                       l,
                       out_channel if stride == 1 else out_channel -
                       in_channel,
                       kernel_shape=1,
                       split=first_split,
                       nl=BN)
            if stride == 1:  # unit (b)
                output = tf.nn.relu(shortcut + l)
            else:  # unit (c)
                shortcut = AvgPooling('avgpool',
                                      shortcut,
                                      3,
                                      2,
                                      padding='SAME')
                output = tf.concat([shortcut, tf.nn.relu(l)], axis=1)
            return output

        def shufflenet_unit_add_supervisor(l, out_channel, group, stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]
            shortcut = l

            # We do not apply group convolution on the first pointwise layer
            # because the number of input channels is relatively small.
            first_split = group if in_channel != 24 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=BN,
                          stride=stride)

            l = Conv2D('conv2',
                       l,
                       out_channel,
                       kernel_shape=1,
                       split=first_split,
                       nl=BN)

            output = tf.nn.relu(shortcut + l)
            return output

        def shufflenet_unit_no_shortcut_supervisor(l, out_channel, group,
                                                   stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]

            # We do not apply group convolution on the first pointwise layer
            # because the number of input channels is relatively small.
            first_split = group if in_channel != 24 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=BN,
                          stride=stride)

            l = Conv2D('conv2',
                       l,
                       out_channel,
                       kernel_shape=1,
                       split=first_split,
                       nl=BN)

            output = tf.nn.relu(l)
            return output

        def shufflenet_unit(l, out_channel, group, stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]
            shortcut = l

            # We do not apply group convolution on the first pointwise layer
            # because the number of input channels is relatively small.
            first_split = group if in_channel != 24 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=c_BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=c_BN,
                          stride=stride)
            l = Conv2D('conv2',
                       l,
                       out_channel if stride == 1 else out_channel -
                       in_channel,
                       kernel_shape=1,
                       split=group,
                       nl=c_BN)
            if stride == 1:  # unit (b)
                output = tf.nn.relu(shortcut + l)

            else:  # unit (c)
                shortcut = AvgPooling('avgpool',
                                      shortcut,
                                      3,
                                      2,
                                      padding='SAME')
                output = tf.concat([shortcut, tf.nn.relu(l)], axis=1)
            return output

        def shufflenet_unit_add(l, out_channel, group, stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]
            shortcut = l

            # We do not apply group convolution on the first pointwise layer
            # because the number of input channels is relatively small.
            first_split = group if in_channel != 24 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=c_BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=c_BN,
                          stride=stride)

            l = Conv2D('conv2',
                       l,
                       out_channel,
                       kernel_shape=1,
                       split=first_split,
                       nl=c_BN)

            output = tf.nn.relu(shortcut + l)
            return output

        def shufflenet_unit_no_shortcut(l, out_channel, group, stride):
            in_shape = l.get_shape().as_list()
            in_channel = in_shape[1]

            # We do not apply group convolution on the first pointwise layer
            # because the number of input channels is relatively small.
            first_split = group if in_channel != 24 else 1
            l = Conv2D('conv1',
                       l,
                       out_channel // 4,
                       kernel_shape=1,
                       split=first_split,
                       nl=c_BNReLU)
            l = channel_shuffle(l, group)
            l = DepthConv('dconv',
                          l,
                          out_channel // 4,
                          kernel_shape=3,
                          nl=c_BN,
                          stride=stride)

            l = Conv2D('conv2',
                       l,
                       out_channel,
                       kernel_shape=1,
                       split=first_split,
                       nl=c_BN)

            output = tf.nn.relu(l)
            return output

        mc = self.mc
        # if mc.LOAD_PRETRAINED_MODEL:
        #   assert tf.gfile.Exists(mc.PRETRAINED_MODEL_PATH), \
        #       'Cannot find pretrained model at the given path:' \
        #       '  {}'.format(mc.PRETRAINED_MODEL_PATH)

        with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \
             argscope(Conv2D, use_bias=False):
            with TowerContext(tf.get_default_graph().get_name_scope(),
                              is_training=False):
                with tf.variable_scope('shuffleDet_supervisor'):

                    group = 3
                    channels = [240, 480, 960]

                    l = tf.transpose(self.image_input, [0, 3, 1, 2])
                    l = Conv2D('conv1', l, 16, 3, stride=1, nl=BNReLU)
                    l = MaxPooling('pool1', l, 3, 2, padding='SAME')

                    with tf.variable_scope('group1'):
                        for i in range(4):
                            with tf.variable_scope('block{}'.format(i)):
                                l = shufflenet_unit_supervisor(
                                    l, channels[0], group, 2 if i == 0 else 1)

                    with tf.variable_scope('group2'):
                        for i in range(6):
                            with tf.variable_scope('block{}'.format(i)):
                                l = shufflenet_unit_supervisor(
                                    l, channels[1], group, 2 if i == 0 else 1)

                    with tf.variable_scope('group3'):
                        for i in range(4):
                            with tf.variable_scope('block{}'.format(i)):
                                l = shufflenet_unit_supervisor(
                                    l, channels[2], group, 2 if i == 0 else 1)

                    with tf.variable_scope('added3'):
                        with tf.variable_scope('block{}'.format(0)):
                            l = shufflenet_unit_add_supervisor(l, 960, 3, 1)
                        with tf.variable_scope('block{}'.format(1)):
                            l = shufflenet_unit_no_shortcut_supervisor(
                                l, 768, 3, 1)

                    supervisor_last_feature = tf.transpose(l, [0, 2, 3, 1])
                    self.inspect_last_feature = supervisor_last_feature

            with argscope(
                    c_batch_norm,
                    is_main_training_tower=int(
                        tf.get_default_graph().get_name_scope()[-1]) == 0,
                    data_format='NCHW'):
                with TowerContext(
                        tf.get_default_graph().get_name_scope(),
                        is_training=mc.IS_TRAINING,
                        index=int(
                            tf.get_default_graph().get_name_scope()[-1])):
                    # with TowerContext(tf.get_default_graph().get_name_scope(), is_training=mc.IS_TRAINING):
                    group = 3
                    # channels = [120, 240, 480]
                    channels = [
                        int(240 * student),
                        int(480 * student),
                        int(960 * student)
                    ]
                    l = tf.transpose(self.image_input, [0, 3, 1, 2])
                    l = Conv2D('conv1', l, 24, 3, stride=1, nl=c_BNReLU)
                    l = MaxPooling('pool1', l, 3, 2, padding='SAME')

                    with tf.variable_scope('group1'):
                        for i in range(4):
                            with tf.variable_scope('block{}'.format(i)):
                                l = shufflenet_unit(l, channels[0], group,
                                                    2 if i == 0 else 1)

                    with tf.variable_scope('group2'):
                        for i in range(6):
                            with tf.variable_scope('block{}'.format(i)):
                                l = shufflenet_unit(l, channels[1], group,
                                                    2 if i == 0 else 1)

                    with tf.variable_scope('group3'):
                        for i in range(4):
                            with tf.variable_scope('block{}'.format(i)):
                                l = shufflenet_unit(l, channels[2], group,
                                                    2 if i == 0 else 1)

                    with tf.variable_scope('added3'):
                        with tf.variable_scope('block{}'.format(0)):
                            l = shufflenet_unit_add(l, int(960 * student), 3,
                                                    1)
                        with tf.variable_scope('block{}'.format(1)):
                            l = shufflenet_unit_no_shortcut(
                                l, int(768 * student), 3, 1)  # 768, 384, 192

                    l = tf.transpose(l, [0, 2, 3, 1])

                    with tf.variable_scope('adaptation'):
                        student_adap = self._conv_layer_no_pretrain(
                            'conv',
                            l,
                            filters=768,
                            size=3,
                            stride=1,
                            padding='SAME',
                            xavier=False,
                            relu=True,
                            stddev=0.0001)
                        # student_adap = Conv2D('conv', l, 768, 3, data_format='channels_last',nl=RELU)

        ###add for mimic
        with tf.variable_scope('mimic_loss'):
            mimic_mask = tf.cast(tf.expand_dims(self.mimic_mask, axis=-1),
                                 tf.float32)
            # this normalization is maybe too harsh
            # mask mimic
            if student == 0.5:
                normalization = tf.reduce_sum(mimic_mask) * 2.
            else:
                normalization = tf.reduce_sum(mimic_mask) * 4.

            self.mimic_loss = tf.div(
                tf.reduce_sum(
                    tf.square(supervisor_last_feature - student_adap) *
                    mimic_mask), normalization)
            if self.without_imitation:
                self.mimic_loss = self.mimic_loss * 0.

            tf.add_to_collection('losses', self.mimic_loss)

        dropout11 = tf.nn.dropout(l, self.keep_prob, name='drop11')

        num_output = mc.ANCHOR_PER_GRID * (mc.CLASSES + 1 + 4)
        self.preds = self._conv_layer_no_pretrain('conv12',
                                                  dropout11,
                                                  filters=num_output,
                                                  size=3,
                                                  stride=1,
                                                  padding='SAME',
                                                  xavier=False,
                                                  relu=False,
                                                  stddev=0.0001)
Exemple #30
0
             'our server and place them properly?').format(tfmodel + '.meta'))

    # set config
    tfconfig = tf.ConfigProto(allow_soft_placement=True)
    tfconfig.gpu_options.allow_growth = True

    # init session
    sess = tf.Session(config=tfconfig)
    # load network
    if demonet == 'mobilenetv1':
        net = GhostNet(batch_size=1)
    # elif demonet == 'res101':
    # net = resnetv1(batch_size=1, num_layers=101)
    else:
        raise NotImplementedError
    with TowerContext('', is_training=False):
        net.create_architecture(sess,
                                "TEST",
                                classes_num,
                                tag='default',
                                anchor_scales=[8, 16, 32])
    saver = tf.train.Saver()
    saver.restore(sess, tfmodel)

    print('Loaded network {:s}'.format(tfmodel))

    #im_names = ['000001.jpg', '000002.jpg', '000003.jpg', '000004.jpg',
    #           '000005.jpg', '000006.jpg']

    path1 = input("请输入测试图片的路径:")
    im_names = os.listdir(path1)