def dense_block(name, input_, out_size, relu, dropout):
            with tf.variable_scope(name):
                in_size = input_.get_shape().as_list()[1]

                w = tf.get_variable(
                    name="w",
                    shape=[in_size, out_size],
                    # initializer=tf.truncated_normal(, stddev=0.1, dtype=tf.float32),
                    dtype=tf.float32
                )
                b = tf.get_variable(
                    name="b",
                    shape=[out_size],
                    # initializer=tf.constant(0.0, shape=[out_size], dtype=tf.float32), dtype=tf.float32
                )
                variable_summaries(w, w.name)
                variable_summaries(b, b.name)

                data = tf.matmul(input_, w) + b
                if relu:
                    data = tf.nn.relu(data)
                if dropout:
                    data = tf.layers.dropout(data, training=self.is_training, rate=self.conf["dropout_rate"])

                print_layer_info(name, w, b, data)
                return data
        def cnn_block(name, input, out_channels, stride, bn, pool, dropout):
            with tf.variable_scope(name):
                in_channels = input.get_shape().as_list()[3]

                w = tf.get_variable(
                    name="w",
                    shape=[3, 3, in_channels, out_channels],
                    dtype=tf.float32)
                b = tf.get_variable("b", initializer=tf.zeros([out_channels], dtype=tf.float32), dtype=tf.float32)
                variable_summaries(w, w.name)
                variable_summaries(b, b.name)

                data = tf.nn.conv2d(input, w, [1, stride, stride, 1], padding='VALID') + b
                print_layer_info(name + "_conv_output", w, b, data)
                if bn:
                    data = tf.contrib.layers.batch_norm(data, center=False, scale=False, is_training=self.is_training, scope=name, decay=0.9)

                data = tf.nn.relu(data)

                if pool:
                    data = tf.nn.max_pool(data, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID')
                print_layer_info(name + "_pool_output", w, b, data)

                if dropout:
                    data = tf.layers.dropout(data, training=self.is_training, rate=self.conf["dropout_rate"])

                print_layer_info(name, w, b, data)
                return data
    def init_graph(self):
        tf.reset_default_graph()
        batch_size = None

        # Input
        self.tf_x = tf.placeholder(tf.float32, shape=(batch_size, cfg.IMG_SIZE, cfg.IMG_SIZE, cfg.NUM_CHANNELS), name="tf_x")
        self.tf_label = tf.placeholder(tf.float32, shape=(batch_size, cfg.NUM_LABELS), name="tf_label")
        self.is_training = tf.placeholder(tf.bool, shape=None, name="is_training")

        def cnn_block(name, input, out_channels, stride, bn, pool, dropout):
            with tf.variable_scope(name):
                in_channels = input.get_shape().as_list()[3]

                w = tf.get_variable(
                    name="w",
                    shape=[3, 3, in_channels, out_channels],
                    dtype=tf.float32)
                b = tf.get_variable("b", initializer=tf.zeros([out_channels], dtype=tf.float32), dtype=tf.float32)
                variable_summaries(w, w.name)
                variable_summaries(b, b.name)

                data = tf.nn.conv2d(input, w, [1, stride, stride, 1], padding='VALID') + b
                print_layer_info(name + "_conv_output", w, b, data)
                if bn:
                    data = tf.contrib.layers.batch_norm(data, center=False, scale=False, is_training=self.is_training, scope=name, decay=0.9)

                data = tf.nn.relu(data)

                if pool:
                    data = tf.nn.max_pool(data, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID')
                print_layer_info(name + "_pool_output", w, b, data)

                if dropout:
                    data = tf.layers.dropout(data, training=self.is_training, rate=self.conf["dropout_rate"])

                print_layer_info(name, w, b, data)
                return data

        def dense_block(name, input_, out_size, relu, dropout):
            with tf.variable_scope(name):
                in_size = input_.get_shape().as_list()[1]

                w = tf.get_variable(
                    name="w",
                    shape=[in_size, out_size],
                    # initializer=tf.truncated_normal(, stddev=0.1, dtype=tf.float32),
                    dtype=tf.float32
                )
                b = tf.get_variable(
                    name="b",
                    shape=[out_size],
                    # initializer=tf.constant(0.0, shape=[out_size], dtype=tf.float32), dtype=tf.float32
                )
                variable_summaries(w, w.name)
                variable_summaries(b, b.name)

                data = tf.matmul(input_, w) + b
                if relu:
                    data = tf.nn.relu(data)
                if dropout:
                    data = tf.layers.dropout(data, training=self.is_training, rate=self.conf["dropout_rate"])

                print_layer_info(name, w, b, data)
                return data

        def print_layer_info(name, w, b, data):
            w_shape = w.get_shape().as_list()
            b_shape = b.get_shape().as_list()
            data_shape = data.get_shape().as_list()[1:]
            print("{}: {} params, w:{} + b:{}".format(
                name,
                np.prod(w_shape) + np.prod(b_shape),
                "*".join([str(val) for val in w_shape]),
                "*".join([str(val) for val in b_shape])))
            print("{}: {} activations, {} per example".format(
                name,
                np.prod(data_shape),
                "*".join([str(val) for val in data_shape])))

        # Net
        activations = cnn_block("cnn_1", self.tf_x, self.conf["cnn_1_out_channels"], stride=1, bn=False, pool=True, dropout=self.conf["cnn_1_dropout"])
        activations = cnn_block("cnn_2", activations, self.conf["cnn_2_out_channels"], stride=1, bn=False, pool=False, dropout=self.conf["cnn_2_dropout"])
        activations = cnn_block("cnn_3", activations, self.conf["cnn_3_out_channels"], stride=1, bn=False, pool=True, dropout=self.conf["cnn_3_dropout"])
        activations = cnn_block("cnn_4", activations, self.conf["cnn_4_out_channels"], stride=1, bn=False,  pool=False, dropout=self.conf["cnn_4_dropout"])
        activations = cnn_block("cnn_5", activations, self.conf["cnn_5_out_channels"], stride=1, bn=False, pool=True, dropout=self.conf["cnn_5_dropout"])
        activations = cnn_block("cnn_6", activations, self.conf["cnn_6_out_channels"], stride=1, bn=False,  pool=False, dropout=self.conf["cnn_6_dropout"])

        with tf.variable_scope("reshape"):
            shape = activations.get_shape().as_list()
            activations_size = shape[1] * shape[2] * shape[3]
            activations = tf.reshape(activations, [-1, activations_size])  # Unroll

        variable_summaries(activations, "after_cnn")

        activations = dense_block("fcn_1", activations, self.conf["fcn_1_out_size"], dropout=self.conf["fcn_1_dropout"], relu=True)
        activations = dense_block("fcn_2", activations, self.conf["fcn_2_out_size"], dropout=self.conf["fcn_2_dropout"], relu=True)
        logits = dense_block("fcn_3", activations, cfg.NUM_LABELS, dropout=False, relu=False)

        variable_summaries(logits, "logits")

        with tf.variable_scope("probability"):
            self.p = tf.nn.sigmoid(logits)
            self.p = smooth_p(self.p)

        # Loss
        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.tf_label, logits=logits))
            tf.summary.scalar("loss", self.loss)

        # Optimizer
        with tf.variable_scope("optimizer"):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                op = tf.train.AdamOptimizer(self.conf["learning_rate"])

                grads_and_vars = op.compute_gradients(self.loss, trainables)
                for grad, var in grads_and_vars:
                    tf.summary.histogram(grad.name, grad)
                variable_summaries(grads_and_vars[0][0], "grad_" + grads_and_vars[0][1].name)
                self.optimizer = op.apply_gradients(grads_and_vars)

        # Saver
        self.saver = tf.train.Saver()
        self.merged_summary_op = tf.summary.merge_all()
    def init_graph(self):
        tf.reset_default_graph()
        batch_size = None

        # Input
        self.tf_x = tf.placeholder(tf.float32, shape=(batch_size, cfg.IMG_SIZE, cfg.IMG_SIZE, cfg.NUM_CHANNELS), name="tf_x")
        self.tf_label = tf.placeholder(tf.float32, shape=(batch_size, cfg.NUM_LABELS), name="tf_label")
        self.is_training = tf.placeholder(tf.bool, shape=None, name="is_training")

        def dense_block(name, input_, out_size, bias, relu, dropout):
            with tf.variable_scope(name):
                in_size = input_.get_shape().as_list()[1]

                w = tf.get_variable(
                    name="w",
                    shape=[in_size, out_size],
                    # initializer=tf.truncated_normal(, stddev=0.1, dtype=tf.float32),
                    dtype=tf.float32
                )
                variable_summaries(w, w.name)

                data = tf.matmul(input_, w)
                if bias:
                    b = tf.get_variable(
                        name="b",
                        shape=[out_size],
                        # initializer=tf.constant(0.0, shape=[out_size], dtype=tf.float32), dtype=tf.float32
                    )
                    data += b
                    variable_summaries(b, b.name)
                if relu:
                    data = tf.nn.relu(data)
                if dropout:
                    data = tf.layers.dropout(data, training=self.is_training, rate=self.conf["dropout_rate"])

                # print_layer_info(name, w, b, data)
                return data

        def print_layer_info(name, w, b, data):
            w_shape = w.get_shape().as_list()
            b_shape = b.get_shape().as_list()
            data_shape = data.get_shape().as_list()[1:]
            print("{}: {} params, w:{} + b:{}".format(
                name,
                np.prod(w_shape) + np.prod(b_shape),
                "*".join([str(val) for val in w_shape]),
                "*".join([str(val) for val in b_shape])))
            print("{}: {} activations, {} per example".format(
                name,
                np.prod(data_shape),
                "*".join([str(val) for val in data_shape])))

        # Net
        activations, endpoints = inception_v4(
            self.tf_x,
            num_classes=None,
            is_training=self.is_training,
            dropout_keep_prob=1.,  # Applies only if num_classes > 0
            reuse=None,
            scope='InceptionV4',
            create_aux_logits=True)

        if self.conf["inception_dropout"]:
            activations = tf.layers.dropout(activations, training=self.is_training, rate=self.conf["dropout_rate"])

        with tf.variable_scope("reshape"):
            shape = activations.get_shape().as_list()
            activations_size = shape[1] * shape[2] * shape[3]
            activations = tf.reshape(activations, [-1, activations_size])  # Unroll

        variable_summaries(activations, "after_cnn")

        if self.conf["fcn_1"]:
            activations = dense_block("fcn_1", activations, self.conf["fcn_1_out_size"], bias=True, dropout=self.conf["fcn_1_dropout"], relu=True)
        # activations = dense_block("fcn_2", activations, self.conf["fcn_2_out_size"], dropout=self.conf["fcn_2_dropout"], relu=True)
        logits = dense_block("fcn_3", activations, cfg.NUM_LABELS, bias=False, dropout=False, relu=False)

        variable_summaries(logits, "logits")

        with tf.variable_scope("probability"):
            self.p = tf.nn.sigmoid(logits)
            self.p = smooth_p(self.p)

        # Loss
        with tf.variable_scope("loss"):
            self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.tf_label, logits=logits))
            tf.summary.scalar("loss", self.loss)

        # Optimizer
        with tf.variable_scope("optimizer"):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                op = tf.train.AdamOptimizer(self.conf["learning_rate"])

                grads_and_vars = op.compute_gradients(self.loss, trainables)
                for grad, var in grads_and_vars:
                    tf.summary.histogram(grad.name, grad)
                variable_summaries(grads_and_vars[0][0], "grad_" + grads_and_vars[0][1].name)
                self.optimizer = op.apply_gradients(grads_and_vars)

        # Saver
        self.saver = tf.train.Saver()
        self.merged_summary_op = tf.summary.merge_all()