コード例 #1
0
ファイル: model.py プロジェクト: nghuyong/MTL-SLAM
    def cnn(self, name_scope, char_embedded):
        char_embedded = tf.expand_dims(char_embedded, -1)
        pooled_outputs = list()
        for i, filter_size in enumerate(self.config.filter_sizes):
            with tf.variable_scope(f"{name_scope}_conv1_{filter_size}"):
                filter_shape = [filter_size, self.config.char_embedding_dim, 1, self.config.n_filter]
                w_filter = weight_variable(shape=filter_shape, name='w_filter')
                beta = bias_variable(shape=[self.config.n_filter], name='beta_filter')
                conv = tf.nn.bias_add(
                    tf.nn.conv2d(char_embedded, w_filter, strides=[1, 1, 1, 1], padding="VALID",
                                 name="conv"), beta)
                h = tf.nn.relu(conv, name="relu")

            with tf.variable_scope(f"{name_scope}_conv2_{filter_size}"):
                filter_shape = [filter_size, 1, self.config.n_filter, self.config.n_filter]
                w_filter = weight_variable(shape=filter_shape, name='w_filter')
                beta = bias_variable(shape=[self.config.n_filter], name='beta_filter')
                conv = tf.nn.bias_add(
                    tf.nn.conv2d(h, w_filter, strides=[1, 1, 1, 1], padding="VALID", name="conv"),
                    beta)
                h = tf.nn.relu(conv, name="relu")
            pooled = tf.nn.max_pool(h, ksize=[1, self.config.char_max_len - filter_size * 2 + 2, 1, 1],
                                    strides=[1, 1, 1, 1], padding='VALID', name="pool")
            pooled_outputs.append(pooled)
        h_pool = tf.concat(pooled_outputs, 3)
        cnn_char_enc = tf.reshape(h_pool,
                                  [self.config.batch_size, -1,
                                   self.config.n_filter * len(self.config.filter_sizes)])
        return cnn_char_enc
コード例 #2
0
    def textcnn(self, X_inputs, n_step):
        """
        TextCNN 模型。
        """
        inputs = tf.expand_dims(X_inputs, -1)
        pooled_outputs = list()
        for i, filter_size in enumerate(self.settings.filter_sizes):
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [
                    filter_size, self.settings.hidden_size * 2 +
                    self.settings.embedding_dim, 1, self.settings.n_filter
                ]
                W_filter = weight_variable(shape=filter_shape, name='W_filter')
                conv = tf.nn.conv2d(inputs,
                                    W_filter,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                h = tf.nn.relu(conv, name="relu")

                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, n_step - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)
        h_pool = tf.concat(pooled_outputs, 3)
        h_pool_flat = tf.reshape(h_pool, [-1, self.n_filter_total])
        return h_pool_flat  # shape = [batch_size, self.n_filter_total]
コード例 #3
0
    def __init__(self, inputNode, dictionary, alpha, lr):
        [batch, f] = inputNode.get_shape().as_list()
        [num_class, dict_size, f] = dictionary.get_shape().as_list()

        actShape = [num_class, batch, dict_size]
        self.potential_init = tf.random_uniform(actShape,
                                                0,
                                                1.05 * alpha,
                                                dtype=tf.float32)

        self.potential = utils.weight_variable(actShape, "potential", 1e-3)
        self.activation = utils.weight_variable(actShape, "activation", 1e-3)
        self.recon = tf.matmul(self.activation, dictionary)
        expand_input = tf.expand_dims(inputNode, 0)
        error = expand_input - self.recon
        #self.recon_error = 0.5 * tf.reduce_mean(tf.reduce_sum(error**2, axis=2))
        self.recon_error = 0.5 * tf.reduce_sum(error**2, axis=[1, 2])
        #self.l1_sparsity = tf.reduce_sum(tf.abs(self.activation))
        self.l1_sparsity = tf.reduce_sum(tf.abs(self.activation), axis=[1, 2])
        self.nnz = tf.count_nonzero(self.activation,
                                    axis=[1, 2]) / (batch * dict_size)
        self.loss = self.recon_error + alpha * self.l1_sparsity

        self.calc_activation = self.activation.assign(
            tf.sign(self.potential) *
            tf.nn.relu(tf.abs(self.potential) - alpha))

        self.reset_potential = self.potential.assign(self.potential_init)

        opt = tf.train.AdamOptimizer(lr)
        #Calculate recon gradient wrt activation
        recon_grad = opt.compute_gradients(self.recon_error, [self.activation])
        #Apply gradient (plus shrinkage) to potential
        #d_potential = [(recon_grad[0][0] + (self.potential - self.activation)/(num_class*batch), self.potential)]
        d_potential = [(recon_grad[0][0] + (self.potential - self.activation),
                        self.potential)]
        self.train_step = opt.apply_gradients(d_potential)
コード例 #4
0
    def __init__(self,
                 inputNode,
                 l1_weight,
                 dict_size,
                 sc_lr,
                 dict_lr,
                 layer_type=None,
                 patch_size=None,
                 stride=None,
                 mask=None):
        curr_input = inputNode
        #Model variables and outputs
        self.model = {}

        assert (layer_type is not None)

        with tf.name_scope("lca_layer"):
            input_shape = curr_input.get_shape().as_list()

            if (len(input_shape) == 3):
                [batch, input_size, input_features] = input_shape
            else:
                [batch, input_features] = input_shape

            if ("sc_fc" == layer_type):
                curr_input = tf.reshape(curr_input, [batch, -1])
                input_features = curr_input.get_shape().as_list()[1]
                D_shape = [input_features, dict_size]
                act_shape = [batch, dict_size]
                reduce_axis = [1]
            else:
                D_shape = [patch_size, input_features, dict_size]
                assert (input_size % stride == 0)
                act_shape = [batch, input_size // stride, dict_size]
                reduce_axis = [1, 2]

            curr_dict = utils.l2_weight_variable(D_shape, "dictionary")
            curr_potential = utils.weight_variable(act_shape,
                                                   "potential",
                                                   std=1e-3)
            curr_activation = utils.weight_variable(act_shape,
                                                    "activation",
                                                    std=1e-3)

            if ("sc_fc" == layer_type):
                curr_recon = tf.matmul(curr_activation,
                                       curr_dict,
                                       transpose_b=True)
            elif ("sc_conv" == layer_type):
                curr_recon = tf.contrib.nn.conv1d_transpose(
                    curr_activation,
                    curr_dict, [batch, input_size, input_features],
                    stride,
                    padding='SAME')
            else:
                assert (0)

            curr_error = curr_input - curr_recon
            curr_recon_error = 0.5 * tf.reduce_mean(
                tf.reduce_sum(curr_error**2, axis=reduce_axis))
            curr_l1_sparsity = tf.reduce_mean(
                tf.reduce_sum(tf.abs(curr_activation), axis=reduce_axis))
            curr_loss = curr_recon_error + 0.5 * l1_weight * curr_l1_sparsity

            self.model["error"] = curr_error
            self.model["recon_error"] = curr_recon_error
            self.model["potential"] = curr_potential
            self.model["activation"] = curr_activation
            self.model["recon"] = curr_recon
            self.model["l1_sparsity"] = curr_l1_sparsity
            self.model["loss"] = curr_loss

            #Ops
            calc_act = tf.nn.relu(curr_potential - l1_weight)
            self.calc_activation = curr_activation.assign(calc_act)

            low_init_val = .8 * l1_weight
            high_init_val = 1.1 * l1_weight
            potential_init = tf.random_uniform(act_shape,
                                               low_init_val,
                                               high_init_val,
                                               dtype=tf.float32)
            self.reset_potential = curr_potential.assign(potential_init)

            #Save all variables
            self.model["dictionary"] = curr_dict
            self.model["output"] = curr_activation
            self.model["input"] = curr_input

        with tf.name_scope("stats"):
            #Calculate stats
            num_total_act = 1
            for s in act_shape:
                num_total_act *= s

            curr_nnz = tf.count_nonzero(curr_activation) / num_total_act

            #Calculate means/std of activations
            #Do this across batches
            #Normalize each feature/dictionary element individually
            if (len(act_shape) == 3):
                moment_reduce_axis = [0, 1]
                tile_input = [act_shape[0], act_shape[1], 1]
            elif (len(act_shape) == 2):
                moment_reduce_axis = 0
                tile_input = [act_shape[0], 1]
            else:
                assert (0)

            act_norm = tf.norm(curr_activation,
                               axis=moment_reduce_axis,
                               keepdims=True)
            act_mean, act_var = tf.nn.moments(curr_activation,
                                              axes=moment_reduce_axis,
                                              keep_dims=True)
            act_std = tf.sqrt(act_var)
            act_max = tf.reduce_max(curr_activation)

            pot_norm = tf.norm(curr_potential,
                               axis=moment_reduce_axis,
                               keepdims=True)
            pot_mean, pot_var = tf.nn.moments(curr_potential,
                                              axes=moment_reduce_axis,
                                              keep_dims=True)
            pot_std = tf.sqrt(pot_var)

            input_norm = tf.norm(curr_input, axis=moment_reduce_axis)
            output_norm = tf.norm(curr_activation, axis=moment_reduce_axis)

            self.model["nnz"] = curr_nnz
            self.model["act_norm"] = act_norm
            self.model["act_mean"] = act_mean
            self.model["act_std"] = act_std
            self.model["act_max"] = act_max
            self.model["pot_norm"] = pot_norm
            self.model["pot_mean"] = pot_mean
            self.model["pot_std"] = pot_std
            self.model["input_norm"] = input_norm
            self.model["output_norm"] = output_norm

        with tf.name_scope("optimizer"):
            #Define optimizer
            #TODO different learning rates?
            opt = tf.train.AdamOptimizer(sc_lr)

            #Calculate recon gradient wrt activation
            recon_grad = opt.compute_gradients(self.model["recon_error"],
                                               self.model["activation"])

            #Apply gradient (plus shrinkage) to potential
            #Needs to be a list of number of gradients, each element as a tuple of (gradient, wrt)

            (grad, var) = recon_grad[0]

            shrink_term = (1 / batch) * (self.model["potential"] -
                                         self.model["activation"])
            d_potential = [(grad + shrink_term, self.model["potential"])]

            self.train_step = opt.apply_gradients(d_potential)
            #Reset must be called after apply_gradients to define opt variables
            self.reset_opt = tf.group([v.initializer for v in opt.variables()])

            #Dictionary update variables
            opt_D = tf.train.AdamOptimizer(dict_lr)
            self.update_D = opt_D.minimize(self.model["recon_error"],
                                           var_list=[self.model["dictionary"]])

            #Normalize D
            curr_dict = self.model["dictionary"]
            dict_shape = curr_dict.get_shape().as_list()
            if (len(dict_shape) == 3):
                curr_norm = tf.norm(curr_dict, axis=(0, 1))
            elif (len(dict_shape) == 2):
                curr_norm = tf.norm(curr_dict, axis=0)
            else:
                assert (0)
            #curr_norm = tf.maximum(tf.ones(dict_shape), curr_norm)
            self.normalize_D = curr_dict.assign(curr_dict / curr_norm)
コード例 #5
0
    def __init__(self):
        self.model_name = 'bigru'
        self.settings = BiGRUSetting()
        self.max_f1 = 0.0
        self.is_training = True

        with tf.name_scope('Inputs'):
            self.title_input = tf.placeholder(tf.int64,
                                              [None, self.settings.title_len],
                                              name='title_inputs')
            self.detail_input = tf.placeholder(
                tf.int64, [None, self.settings.detail_len],
                name='detail_inputs')
            self.class_input = tf.placeholder(tf.float32,
                                              [None, self.settings.class_num],
                                              name='class_input')
            self.title_length = tf.placeholder(tf.int64, [None],
                                               name='title_length')
            self.detail_length = tf.placeholder(tf.int64, [None],
                                                name='detail_length')
            self.keep_prob = tf.placeholder(tf.float32, [])
        """
        构建embedding层
        """
        with tf.variable_scope('embedding'):
            self.embedding = tf.get_variable(
                name='embedding',
                shape=[self.settings.voc_size, self.settings.embedding_dim],
                initializer=tf.contrib.layers.xavier_initializer())
        """
        构建stack_bi_gru+Attention层
        """
        with tf.variable_scope('bi_gru_title'):
            title_embedded = tf.nn.embedding_lookup(self.embedding,
                                                    self.title_input)
            title_bi_gru_output = self.stack_bi_gru_layer(
                title_embedded, self.title_length)
            title_attention_output = attention_layer(
                title_bi_gru_output, self.settings.bi_gru_hidden_dim * 2)

        with tf.variable_scope('bi_gru_detail'):
            detail_embedded = tf.nn.embedding_lookup(self.embedding,
                                                     self.detail_input)
            detail_bi_gru_output = self.stack_bi_gru_layer(
                detail_embedded, self.detail_length)
            detail_attention_output = attention_layer(
                detail_bi_gru_output, self.settings.bi_gru_hidden_dim * 2)
        """
        构建fully connected层
        """
        with tf.variable_scope('fc'):
            concat_output = tf.concat(
                [title_attention_output, detail_attention_output], axis=1)
            W_fc = weight_variable([
                self.settings.bi_gru_hidden_dim * 4,
                self.settings.fc_hidden_dim
            ],
                                   name='Weight_fc')
            fc_output = tf.matmul(concat_output, W_fc, name='h_fc')
            fc_bn_relu = tf.nn.relu(fc_output, name="relu")
        """
        构建输出层
        """
        with tf.variable_scope('output'):
            W_out = weight_variable(
                [self.settings.fc_hidden_dim, self.settings.class_num],
                name='Weight_out')
            b_out = bias_variable([self.settings.class_num], name='bias_out')
            self.y_pred = tf.nn.xw_plus_b(fc_bn_relu,
                                          W_out,
                                          b_out,
                                          name='y_pred')
            self.sigmoid_y_pred = tf.nn.sigmoid(self.y_pred)
        """
        loss
        """
        with tf.variable_scope('loss'):
            self.loss = add_loss(self.y_pred, self.class_input)
        """
        train
        """
        with tf.variable_scope('training_ops'):
            self.train_op = add_train_op(lr=self.settings.lr, loss=self.loss)

        self.saver = tf.train.Saver(max_to_keep=1, name=self.model_name)

        print(f'{self.model_name} init finish')
コード例 #6
0
    def __init__(self):
        super().__init__('rcnn')
        self.settings = RCNNSetting()
        self.n_filter_total = self.settings.n_filter * len(
            self.settings.filter_sizes)

        with tf.name_scope('Inputs'):
            self.title_input = tf.placeholder(tf.int64,
                                              [None, self.settings.title_len],
                                              name='title_inputs')
            self.detail_input = tf.placeholder(
                tf.int64, [None, self.settings.detail_len],
                name='detail_inputs')
            self.class_input = tf.placeholder(tf.float32,
                                              [None, self.settings.class_num],
                                              name='class_input')
            self.title_length = tf.placeholder(tf.int64, [None],
                                               name='title_length')
            self.detail_length = tf.placeholder(tf.int64, [None],
                                                name='detail_length')
            self.keep_prob = tf.placeholder(tf.float32, [])
        """
        构建embedding层
        """
        with tf.variable_scope('embedding'):
            self.embedding = tf.get_variable(
                name='embedding',
                shape=[self.settings.voc_size, self.settings.embedding_dim],
                initializer=tf.contrib.layers.xavier_initializer())
        """
        构建RCNN层
        """
        with tf.variable_scope('rcnn_text'):
            output_title = self.rcnn_layer(self.title_input,
                                           self.settings.title_len,
                                           self.title_length)

        with tf.variable_scope('rcnn_content'):
            output_content = self.rcnn_layer(self.detail_input,
                                             self.settings.detail_len,
                                             self.detail_length)

        concat_output = tf.concat([output_title, output_content], axis=1)
        """
        构建fully connected层
        """
        with tf.variable_scope('fc_bn'):
            W_fc = weight_variable(
                [self.n_filter_total * 2, self.settings.fc_hidden_dim],
                name='Weight_fc')
            fc_output = tf.matmul(concat_output, W_fc, name='h_fc')
            fc_bn_relu = tf.nn.relu(fc_output, name="relu")
            fc_bn_drop = tf.nn.dropout(fc_bn_relu, self.keep_prob)
        """
        构建输出层
        """
        with tf.variable_scope('output'):
            W_out = weight_variable(
                [self.settings.fc_hidden_dim, self.settings.class_num],
                name='Weight_out')
            b_out = bias_variable([self.settings.class_num], name='bias_out')
            self.y_pred = tf.nn.xw_plus_b(fc_bn_drop,
                                          W_out,
                                          b_out,
                                          name='y_pred')
            self.sigmoid_y_pred = tf.nn.sigmoid(self.y_pred)
        """
        loss
        """
        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.y_pred, labels=self.class_input))
        """
        train
        """
        with tf.variable_scope('training_ops'):
            self.train_op = add_train_op(lr=self.settings.lr,
                                         loss=self.loss,
                                         global_step=self.global_step)

        self.saver = tf.train.Saver(max_to_keep=1, name=self.model_name)

        print(f'{self.model_name} init finish')
コード例 #7
0
ファイル: lcaDeepSC.py プロジェクト: slundqui/TFSparseCode
    def __init__(self, inputNode, num_layers, l1_weight, dict_size, sc_lr, dict_lr, layer_type=None, patch_size=None, stride=None, mask=None, err_weight=None, act_weight=None, top_down_weight=None, normalize_act=None, inject_act_bool=None, inject_act=None):
        curr_input = inputNode
        #Model variables and outputs
        self.model = {}
        self.model["dictionary"] = []
        self.model["potential"] = []
        self.model["activation"] = []
        self.model["recon"] = []
        self.model["input"] = []
        self.model["output"] = []
        self.model["error"] = []
        self.model["recon_error"] = []
        self.model["l1_sparsity"] = []
        self.model["nnz"] = []
        self.model["loss"] = []
        self.model["act_norm"] = []
        self.model["act_mean"] = []
        self.model["act_std"] = []
        self.model["act_max"] = []
        self.model["pot_norm"] = []
        self.model["pot_mean"] = []
        self.model["pot_std"] = []
        self.model["input_norm"] = []
        self.model["output_norm"] = []

        assert(layer_type is not None)


        #Model operations
        self.calc_activation = []
        self.reset_potential = []

        switch_fc = False

        if(err_weight is None):
            err_weight = [1 for i in range(num_layers)]
        if(act_weight is None):
            act_weight = [1 for i in range(num_layers)]
        if(top_down_weight is None):
            top_down_weight = [1 for i in range(num_layers)]

        for l in range(num_layers):
            with tf.name_scope("lca_layer_"+str(l)):
                curr_layer_type = layer_type[l]
                curr_dict_size = dict_size[l]
                curr_stride = stride[l]
                curr_patch_size = patch_size[l]
                curr_l1_weight = l1_weight[l]
                curr_normalize = normalize_act[l]

                input_shape = curr_input.get_shape().as_list()

                if(len(input_shape) == 3):
                    [batch, input_size, input_features] = input_shape
                else:
                    [batch, input_features] = input_shape

                if("sc_fc" == curr_layer_type):
                    switch_fc = True
                    curr_input = tf.reshape(curr_input, [batch, -1])
                    input_features = curr_input.get_shape().as_list()[1]
                    D_shape = [input_features, curr_dict_size]
                    act_shape = [batch, curr_dict_size]
                    reduce_axis = [1]
                else:
                    assert(not switch_fc)
                    D_shape = [curr_patch_size, input_features, curr_dict_size]
                    assert(input_size % curr_stride == 0)
                    act_shape = [batch, input_size//curr_stride, curr_dict_size]
                    reduce_axis = [1, 2]

                curr_dict = utils.l2_weight_variable(D_shape, "dictionary"+str(l))

                curr_potential = utils.weight_variable(act_shape, "potential"+str(l), std=1e-3)
                curr_activation = utils.weight_variable(act_shape, "activation"+str(l), std=1e-3)

                if("sc_fc" == curr_layer_type):
                    curr_recon = tf.matmul(curr_activation, curr_dict, transpose_b=True)
                elif("sc_conv" == curr_layer_type):
                    curr_recon = tf.contrib.nn.conv1d_transpose(curr_activation, curr_dict, [batch, input_size, input_features], curr_stride, padding='SAME')
                else:
                    assert(0)

                curr_error = curr_input - curr_recon
                curr_recon_error = err_weight[l] * 0.5 * tf.reduce_mean(tf.reduce_sum(curr_error**2, axis=reduce_axis))
                curr_l1_sparsity = err_weight[l] * tf.reduce_mean(tf.reduce_sum(tf.abs(curr_activation), axis=reduce_axis))
                #curr_recon_error = err_weight[l] * 0.5 * tf.reduce_mean(curr_error**2)
                #curr_l1_sparsity = err_weight[l] * tf.reduce_mean(tf.abs(curr_activation))
                curr_loss = curr_recon_error + 0.5 * curr_l1_weight * curr_l1_sparsity

                self.model["error"].append(curr_error)
                self.model["recon_error"].append(curr_recon_error)
                self.model["potential"].append(curr_potential)
                self.model["activation"].append(curr_activation)
                self.model["recon"].append(curr_recon)
                self.model["l1_sparsity"].append(curr_l1_sparsity)
                self.model["loss"].append(curr_loss)

                #Ops
                #Use inject act if last layer for semi-supervised learning
                calc_act = tf.nn.relu(curr_potential - curr_l1_weight)
                if(l == num_layers - 1 and inject_act_bool is not None):
                    set_act = tf.where(inject_act_bool, inject_act, calc_act)
                    self.calc_activation.append(curr_activation.assign(set_act))
                else:
                    self.calc_activation.append(curr_activation.assign(calc_act))

                if(curr_l1_weight == 0):
                    low_init_val = -.1
                    high_init_val = .1
                else:
                    low_init_val = .8*curr_l1_weight
                    high_init_val  = 1.1*curr_l1_weight
                potential_init = tf.random_uniform(act_shape, low_init_val, high_init_val, dtype=tf.float32)
                self.reset_potential.append(curr_potential.assign(potential_init))

                num_total_act = 1
                for s in act_shape:
                    num_total_act *= s

                curr_nnz = tf.count_nonzero(curr_activation) / num_total_act

                #Save all variables
                self.model["dictionary"].append(curr_dict)
                self.model["output"].append(curr_activation)
                self.model["input"].append(curr_input)
                self.model["nnz"].append(curr_nnz)

                #Calculate means/std of activations
                #Do this across batches
                #Normalize each feature/dictionary element individually
                if(len(act_shape) == 3):
                    moment_reduce_axis = [0, 1]
                    tile_input = [act_shape[0], act_shape[1], 1]
                elif(len(act_shape) == 2):
                    moment_reduce_axis = 0
                    tile_input = [act_shape[0], 1]
                else:
                    assert(0)

                act_norm = tf.norm(curr_activation, axis=moment_reduce_axis, keepdims=True)
                act_mean, act_var = tf.nn.moments(curr_activation, axes=moment_reduce_axis, keep_dims=True)
                act_std = tf.sqrt(act_var)
                act_max = tf.reduce_max(curr_activation)

                pot_norm = tf.norm(curr_potential, axis=moment_reduce_axis, keepdims=True)
                pot_mean, pot_var = tf.nn.moments(curr_potential, axes=moment_reduce_axis, keep_dims=True)
                pot_std = tf.sqrt(pot_var)

                self.model["act_norm"].append(act_norm)
                self.model["act_mean"].append(act_mean)
                self.model["act_std"].append(act_std)
                self.model["act_max"].append(act_max)
                self.model["pot_norm"].append(pot_norm)
                self.model["pot_mean"].append(pot_mean)
                self.model["pot_std"].append(pot_std)

                input_norm = tf.norm(curr_input, axis=moment_reduce_axis)
                self.model["input_norm"].append(input_norm)

                if(curr_normalize):
                    #curr_input = ((curr_activation - act_mean)/(act_std+1e-8)) * act_weight[l]
                    curr_input = ((curr_potential - pot_mean)/(pot_std+1e-8)) * act_weight[l]
                else:
                    #curr_input = curr_activation * act_weight[l]
                    curr_input = curr_potential * act_weight[l]

                output_norm = tf.norm(curr_input, axis=moment_reduce_axis)
                self.model["output_norm"].append(output_norm)

                #Stop gradient, as we explcitly compute top down feedback
                curr_input = tf.stop_gradient(curr_input)

        with tf.name_scope("optimizer"):
            #Group ops
            self.calc_activation = tf.group(*self.calc_activation)
            self.reset_potential = tf.group(*self.reset_potential)

            #Define optimizer
            #TODO different learning rates?
            opt = tf.train.AdamOptimizer(sc_lr)

            total_recon_error = tf.reduce_sum(self.model["recon_error"])
            self.model["total_recon_error"] = total_recon_error

            #Calculate recon gradient wrt activation
            recon_grad = opt.compute_gradients(total_recon_error, self.model["activation"])

            #Apply gradient (plus shrinkage) to potential
            #Needs to be a list of number of gradients, each element as a tuple of (gradient, wrt)

            d_potential = []
            for i, (grad, var) in enumerate(recon_grad):
                shrink_term = err_weight[i] * (1/batch) * (self.model["potential"][i] - self.model["activation"][i])
                #The top down term doesn't exist with the recon loss as written, since potential
                #isnt connected to the total recon loss
                if(i < (num_layers - 1)):
                    top_down_term = top_down_weight[i] * self.model["error"][i+1]
                else:
                    top_down_term = 0
                d_potential.append((grad + shrink_term - top_down_term, self.model["potential"][i]))

            self.train_step = opt.apply_gradients(d_potential)
            #Reset must be called after apply_gradients to define opt variables
            self.reset_opt = tf.group([v.initializer for v in opt.variables()])

            #Dictionary update variables
            opt_D = tf.train.AdamOptimizer(dict_lr)
            self.update_D = opt_D.minimize(total_recon_error, var_list=[self.model["dictionary"]])

            #Normalize D
            self.normalize_D = []
            for l in range(num_layers):
                curr_dict = self.model["dictionary"][l]
                dict_shape = curr_dict.get_shape().as_list()
                if(len(dict_shape) == 3):
                    curr_norm = tf.norm(curr_dict, axis=(0, 1))
                else:
                    curr_norm = tf.norm(curr_dict, axis=0)
                #curr_norm = tf.maximum(tf.ones(dict_shape), curr_norm)
                self.normalize_D.append(curr_dict.assign(curr_dict/curr_norm))
            self.normalize_D = tf.group(*self.normalize_D)

        with tf.name_scope("weight_recon"):
            #Allows calculating reconstruction from each layer
            layer_weights = []
            for l in range(num_layers):
                recon_l_fc = ("sc_fc" == layer_type[l])
                recon_l_num_dict = dict_size[l]
                recon_act = tf.eye(recon_l_num_dict)
                if(not recon_l_fc):
                    recon_act = recon_act[:, tf.newaxis, :]

                switch_conv = not recon_l_fc

                curr_act = recon_act
                curr_pot = None
                for ll in reversed(range(l+1)):
                    curr_dict = self.model["dictionary"][ll]
                    curr_layer_type = layer_type[ll]
                    curr_stride = stride[ll]
                    curr_patch_size = patch_size[ll]
                    curr_l1_weight = l1_weight[ll]
                    curr_normalize = normalize_act[ll]

                    #Find activity given potential
                    #Don't normalize layer we're visualizing
                    if(ll != l):
                        #Normalize the potential and calculate next activity
                        if(curr_normalize):
                            curr_pot = (curr_pot/act_weight[ll]) * (self.model["pot_std"][ll] + 1e-8) + self.model["pot_mean"][ll]
                        else:
                            curr_pot = curr_pot/act_weight[ll]
                        curr_act = tf.nn.relu(curr_pot - curr_l1_weight)

                    #Reshape if needed (fc -> conv layer)
                    if("sc_conv" == curr_layer_type):
                        input_shape = curr_act.get_shape().as_list()
                        if(not switch_conv):
                            switch_conv = True
                            input_shape = self.model["output"][ll].get_shape().as_list()
                            input_shape[0] = recon_l_num_dict
                            curr_act = tf.reshape(curr_act, input_shape)

                    #Reconstruct given the activity
                    if("sc_fc" == curr_layer_type):
                        curr_pot = tf.matmul(curr_act, curr_dict, transpose_b=True)
                    else:
                        if(recon_l_fc):
                            if(ll == 0):
                                output_shape = inputNode.get_shape().as_list()
                            else:
                                output_shape = self.model["output"][ll-1].get_shape().as_list()
                            output_shape[0] = recon_l_num_dict
                        else:
                            num_x = input_shape[1]
                            num_out_x = curr_patch_size + ((num_x-1) * curr_stride)
                            if(ll == 0):
                                output_features = inputNode.get_shape().as_list()[-1]
                            else:
                                output_features = self.model["output"][ll-1].get_shape().as_list()[-1]
                            output_shape = [recon_l_num_dict, num_out_x, output_features]
                        if(recon_l_fc):
                            padding='SAME'
                        else:
                            padding='VALID'

                        curr_pot = tf.contrib.nn.conv1d_transpose(curr_act, curr_dict, output_shape, curr_stride, padding=padding)


                layer_weights.append(curr_pot)
            self.model["layer_weights"] = layer_weights
コード例 #8
0
ファイル: lcaDeepSC.py プロジェクト: zengxi77/TFSparseCode
    def __init__(self,
                 inputNode,
                 num_layers,
                 l1_weight,
                 dict_size,
                 sc_lr,
                 dict_lr,
                 layer_type=None,
                 patch_size=None,
                 stride=None,
                 mask=None,
                 err_weight=None,
                 act_weight=None,
                 top_down_weight=None,
                 normalize_act=None,
                 inject_act_bool=None,
                 inject_act=None):
        curr_input = inputNode
        #Model variables and outputs
        self.model = {}
        self.model["dictionary"] = []
        self.model["potential"] = []
        self.model["activation"] = []
        self.model["recon"] = []
        self.model["input"] = []
        self.model["output"] = []
        self.model["error"] = []
        self.model["recon_error"] = []
        self.model["l1_sparsity"] = []
        self.model["nnz"] = []
        self.model["loss"] = []
        self.model["act_norm"] = []
        self.model["act_mean"] = []
        self.model["act_std"] = []
        self.model["act_max"] = []
        self.model["pot_norm"] = []
        self.model["pot_mean"] = []
        self.model["pot_std"] = []
        self.model["input_norm"] = []
        self.model["output_norm"] = []

        assert (layer_type is not None)

        #Model operations
        self.calc_activation = []
        self.reset_potential = []

        switch_fc = False

        if (err_weight is None):
            err_weight = [1 for i in range(num_layers)]
        if (act_weight is None):
            act_weight = [1 for i in range(num_layers)]
        if (top_down_weight is None):
            top_down_weight = [1 for i in range(num_layers)]

        for l in range(num_layers):
            with tf.name_scope("lca_layer_" + str(l)):
                curr_layer_type = layer_type[l]
                curr_dict_size = dict_size[l]
                curr_stride = stride[l]
                curr_patch_size = patch_size[l]
                curr_l1_weight = l1_weight[l]
                curr_normalize = normalize_act[l]

                input_shape = curr_input.get_shape().as_list()

                if (len(input_shape) == 3):
                    [batch, input_size, input_features] = input_shape
                else:
                    [batch, input_features] = input_shape

                if ("sc_fc" == curr_layer_type):
                    switch_fc = True
                    curr_input = tf.reshape(curr_input, [batch, -1])
                    input_features = curr_input.get_shape().as_list()[1]
                    D_shape = [input_features, curr_dict_size]
                    act_shape = [batch, curr_dict_size]
                    reduce_axis = [1]
                else:
                    assert (not switch_fc)
                    D_shape = [curr_patch_size, input_features, curr_dict_size]
                    assert (input_size % curr_stride == 0)
                    act_shape = [
                        batch, input_size // curr_stride, curr_dict_size
                    ]
                    reduce_axis = [1, 2]

                curr_dict = utils.l2_weight_variable(D_shape,
                                                     "dictionary" + str(l))

                curr_potential = utils.weight_variable(act_shape,
                                                       "potential" + str(l),
                                                       std=1e-3)
                curr_activation = utils.weight_variable(act_shape,
                                                        "activation" + str(l),
                                                        std=1e-3)

                if ("sc_fc" == curr_layer_type):
                    curr_recon = tf.matmul(curr_activation,
                                           curr_dict,
                                           transpose_b=True)
                elif ("sc_conv" == curr_layer_type):
                    curr_recon = tf.contrib.nn.conv1d_transpose(
                        curr_activation,
                        curr_dict, [batch, input_size, input_features],
                        curr_stride,
                        padding='SAME')
                else:
                    assert (0)

                curr_error = curr_input - curr_recon
                curr_recon_error = err_weight[l] * 0.5 * tf.reduce_mean(
                    tf.reduce_sum(curr_error**2, axis=reduce_axis))
                curr_l1_sparsity = err_weight[l] * tf.reduce_mean(
                    tf.reduce_sum(tf.abs(curr_activation), axis=reduce_axis))
                #curr_recon_error = err_weight[l] * 0.5 * tf.reduce_mean(curr_error**2)
                #curr_l1_sparsity = err_weight[l] * tf.reduce_mean(tf.abs(curr_activation))
                curr_loss = curr_recon_error + 0.5 * curr_l1_weight * curr_l1_sparsity

                self.model["error"].append(curr_error)
                self.model["recon_error"].append(curr_recon_error)
                self.model["potential"].append(curr_potential)
                self.model["activation"].append(curr_activation)
                self.model["recon"].append(curr_recon)
                self.model["l1_sparsity"].append(curr_l1_sparsity)
                self.model["loss"].append(curr_loss)

                #Ops
                #Use inject act if last layer for semi-supervised learning
                calc_act = tf.nn.relu(curr_potential - curr_l1_weight)
                if (l == num_layers - 1 and inject_act_bool is not None):
                    set_act = tf.where(inject_act_bool, inject_act, calc_act)
                    self.calc_activation.append(
                        curr_activation.assign(set_act))
                else:
                    self.calc_activation.append(
                        curr_activation.assign(calc_act))

                if (curr_l1_weight == 0):
                    low_init_val = -.1
                    high_init_val = .1
                else:
                    low_init_val = .8 * curr_l1_weight
                    high_init_val = 1.1 * curr_l1_weight
                potential_init = tf.random_uniform(act_shape,
                                                   low_init_val,
                                                   high_init_val,
                                                   dtype=tf.float32)
                self.reset_potential.append(
                    curr_potential.assign(potential_init))

                num_total_act = 1
                for s in act_shape:
                    num_total_act *= s

                curr_nnz = tf.count_nonzero(curr_activation) / num_total_act

                #Save all variables
                self.model["dictionary"].append(curr_dict)
                self.model["output"].append(curr_activation)
                self.model["input"].append(curr_input)
                self.model["nnz"].append(curr_nnz)

                #Calculate means/std of activations
                #Do this across batches
                #Normalize each feature/dictionary element individually
                if (len(act_shape) == 3):
                    moment_reduce_axis = [0, 1]
                    tile_input = [act_shape[0], act_shape[1], 1]
                elif (len(act_shape) == 2):
                    moment_reduce_axis = 0
                    tile_input = [act_shape[0], 1]
                else:
                    assert (0)

                act_norm = tf.norm(curr_activation,
                                   axis=moment_reduce_axis,
                                   keepdims=True)
                act_mean, act_var = tf.nn.moments(curr_activation,
                                                  axes=moment_reduce_axis,
                                                  keep_dims=True)
                act_std = tf.sqrt(act_var)
                act_max = tf.reduce_max(curr_activation)

                pot_norm = tf.norm(curr_potential,
                                   axis=moment_reduce_axis,
                                   keepdims=True)
                pot_mean, pot_var = tf.nn.moments(curr_potential,
                                                  axes=moment_reduce_axis,
                                                  keep_dims=True)
                pot_std = tf.sqrt(pot_var)

                self.model["act_norm"].append(act_norm)
                self.model["act_mean"].append(act_mean)
                self.model["act_std"].append(act_std)
                self.model["act_max"].append(act_max)
                self.model["pot_norm"].append(pot_norm)
                self.model["pot_mean"].append(pot_mean)
                self.model["pot_std"].append(pot_std)

                input_norm = tf.norm(curr_input, axis=moment_reduce_axis)
                self.model["input_norm"].append(input_norm)

                if (curr_normalize):
                    #curr_input = ((curr_activation - act_mean)/(act_std+1e-8)) * act_weight[l]
                    curr_input = ((curr_potential - pot_mean) /
                                  (pot_std + 1e-8)) * act_weight[l]
                else:
                    #curr_input = curr_activation * act_weight[l]
                    curr_input = curr_potential * act_weight[l]

                output_norm = tf.norm(curr_input, axis=moment_reduce_axis)
                self.model["output_norm"].append(output_norm)

                #Stop gradient, as we explcitly compute top down feedback
                curr_input = tf.stop_gradient(curr_input)

        with tf.name_scope("optimizer"):
            #Group ops
            self.calc_activation = tf.group(*self.calc_activation)
            self.reset_potential = tf.group(*self.reset_potential)

            #Define optimizer
            #TODO different learning rates?
            opt = tf.train.AdamOptimizer(sc_lr)

            total_recon_error = tf.reduce_sum(self.model["recon_error"])
            self.model["total_recon_error"] = total_recon_error

            #Calculate recon gradient wrt activation
            recon_grad = opt.compute_gradients(total_recon_error,
                                               self.model["activation"])

            #Apply gradient (plus shrinkage) to potential
            #Needs to be a list of number of gradients, each element as a tuple of (gradient, wrt)

            d_potential = []
            for i, (grad, var) in enumerate(recon_grad):
                shrink_term = err_weight[i] * (1 / batch) * (
                    self.model["potential"][i] - self.model["activation"][i])
                #The top down term doesn't exist with the recon loss as written, since potential
                #isnt connected to the total recon loss
                if (i < (num_layers - 1)):
                    top_down_term = top_down_weight[i] * self.model["error"][
                        i + 1]
                else:
                    top_down_term = 0
                d_potential.append((grad + shrink_term - top_down_term,
                                    self.model["potential"][i]))

            self.train_step = opt.apply_gradients(d_potential)
            #Reset must be called after apply_gradients to define opt variables
            self.reset_opt = tf.group([v.initializer for v in opt.variables()])

            #Dictionary update variables
            opt_D = tf.train.AdamOptimizer(dict_lr)
            self.update_D = opt_D.minimize(total_recon_error,
                                           var_list=[self.model["dictionary"]])

            #Normalize D
            self.normalize_D = []
            for l in range(num_layers):
                curr_dict = self.model["dictionary"][l]
                dict_shape = curr_dict.get_shape().as_list()
                if (len(dict_shape) == 3):
                    curr_norm = tf.norm(curr_dict, axis=(0, 1))
                else:
                    curr_norm = tf.norm(curr_dict, axis=0)
                #curr_norm = tf.maximum(tf.ones(dict_shape), curr_norm)
                self.normalize_D.append(curr_dict.assign(curr_dict /
                                                         curr_norm))
            self.normalize_D = tf.group(*self.normalize_D)

        with tf.name_scope("weight_recon"):
            #Allows calculating reconstruction from each layer
            layer_weights = []
            for l in range(num_layers):
                recon_l_fc = ("sc_fc" == layer_type[l])
                recon_l_num_dict = dict_size[l]
                recon_act = tf.eye(recon_l_num_dict)
                if (not recon_l_fc):
                    recon_act = recon_act[:, tf.newaxis, :]

                switch_conv = not recon_l_fc

                curr_act = recon_act
                curr_pot = None
                for ll in reversed(range(l + 1)):
                    curr_dict = self.model["dictionary"][ll]
                    curr_layer_type = layer_type[ll]
                    curr_stride = stride[ll]
                    curr_patch_size = patch_size[ll]
                    curr_l1_weight = l1_weight[ll]
                    curr_normalize = normalize_act[ll]

                    #Find activity given potential
                    #Don't normalize layer we're visualizing
                    if (ll != l):
                        #Normalize the potential and calculate next activity
                        if (curr_normalize):
                            curr_pot = (curr_pot / act_weight[ll]) * (
                                self.model["pot_std"][ll] +
                                1e-8) + self.model["pot_mean"][ll]
                        else:
                            curr_pot = curr_pot / act_weight[ll]
                        curr_act = tf.nn.relu(curr_pot - curr_l1_weight)

                    #Reshape if needed (fc -> conv layer)
                    if ("sc_conv" == curr_layer_type):
                        input_shape = curr_act.get_shape().as_list()
                        if (not switch_conv):
                            switch_conv = True
                            input_shape = self.model["output"][ll].get_shape(
                            ).as_list()
                            input_shape[0] = recon_l_num_dict
                            curr_act = tf.reshape(curr_act, input_shape)

                    #Reconstruct given the activity
                    if ("sc_fc" == curr_layer_type):
                        curr_pot = tf.matmul(curr_act,
                                             curr_dict,
                                             transpose_b=True)
                    else:
                        if (recon_l_fc):
                            if (ll == 0):
                                output_shape = inputNode.get_shape().as_list()
                            else:
                                output_shape = self.model["output"][
                                    ll - 1].get_shape().as_list()
                            output_shape[0] = recon_l_num_dict
                        else:
                            num_x = input_shape[1]
                            num_out_x = curr_patch_size + (
                                (num_x - 1) * curr_stride)
                            if (ll == 0):
                                output_features = inputNode.get_shape(
                                ).as_list()[-1]
                            else:
                                output_features = self.model["output"][
                                    ll - 1].get_shape().as_list()[-1]
                            output_shape = [
                                recon_l_num_dict, num_out_x, output_features
                            ]
                        if (recon_l_fc):
                            padding = 'SAME'
                        else:
                            padding = 'VALID'

                        curr_pot = tf.contrib.nn.conv1d_transpose(
                            curr_act,
                            curr_dict,
                            output_shape,
                            curr_stride,
                            padding=padding)

                layer_weights.append(curr_pot)
            self.model["layer_weights"] = layer_weights
コード例 #9
0
    def buildModel(self):
        with tf.device(self.params.device):
            with tf.name_scope("Variables"):
                #Dictionary elements
                D_shape = [
                    self.params.num_classes, self.params.dict_size,
                    self.params.num_features
                ]
                if (self.params.init_weights is None):
                    self.D = utils.l2_weight_variable(D_shape, "dictionary")
                else:
                    if (len(self.params.init_weights.shape) == 2):
                        init_weights = self.params.init_weights[np.newaxis,
                                                                ...]
                        init_weights = np.tile(init_weights,
                                               [self.params.num_classes, 1, 1])
                    else:
                        init_weights = self.params.init_weights
                    self.D = tf.Variable(init_weights.astype(np.float32),
                                         name="dictionary")

                #Binary classification
                W_shape = [
                    self.params.num_classes, self.params.dict_size,
                    self.params.num_features
                ]
                self.W = utils.weight_variable(W_shape, "class_weights")

                self.input = tf.placeholder(
                    tf.float32,
                    shape=[self.params.batch_size, self.params.num_features],
                    name="input")
                self.labels = tf.placeholder(tf.int64,
                                             shape=[self.params.batch_size],
                                             name="labels")

                self.norm_input = (self.input - tf.reduce_mean(
                    self.input, axis=1, keepdims=True)) / tf.norm(
                        self.input, axis=1, keepdims=True)

                #Set binary labels for each class
                onehot_labels = tf.transpose(
                    tf.one_hot(self.labels, self.params.num_classes), [1, 0])
                #go from [0, 1] to [-1, 1]
                onehot_labels = onehot_labels * 2 - 1

                #Add to tensorboard
                self.varDict["D"] = self.D
                self.varDict["W"] = self.W
                self.varDict["labels"] = self.labels
                self.varDict["onehot_labels"] = onehot_labels
                if (self.params.image_shape is not None):
                    reshape_image = tf.reshape(self.norm_input,
                                               (self.params.batch_size, ) +
                                               self.params.image_shape)
                    self.imageDict["norm_image"] = reshape_image
                else:
                    self.varDict["norm_input"] = self.norm_input

            with tf.name_scope("SC"):
                self.scObj = lcaSC(self.norm_input, self.D,
                                   self.params.l1_weight, self.params.sc_lr)
                sc_activation = self.scObj.activation

                self.varDict["sc_activation"] = sc_activation
                self.scalarDict["sc_recon_err"] = tf.reduce_mean(
                    self.scObj.recon_error)
                self.scalarDict["sc_l1_sparsity"] = tf.reduce_mean(
                    self.scObj.l1_sparsity)
                self.scalarDict["sc_loss"] = tf.reduce_mean(self.scObj.loss)
                self.scalarDict["sc_nnz"] = tf.reduce_mean(self.scObj.nnz)
                if (self.params.image_shape is not None):
                    reshape_recon = tf.reshape(self.scObj.recon, (
                        self.params.num_classes,
                        self.params.batch_size,
                    ) + self.params.image_shape)
                    for i in range(self.params.num_classes):
                        self.imageDict["recon_class_" +
                                       str(i)] = reshape_recon[i, ...]
                else:
                    self.varDict["recon"] = self.scObj.recon

            with tf.name_scope("feedforward"):
                tile_input = tf.tile(self.norm_input[tf.newaxis, :, :],
                                     [self.params.num_classes, 1, 1])
                feed_forward = tf.matmul(tile_input, self.W, transpose_b=True)
                #Taking inner product of feed_forward with sc_activation
                #i.e., diag(matmul(feed_forward, sc_activation))
                feed_forward = tf.reduce_sum(feed_forward * sc_activation,
                                             axis=2)
                self.est_labels = tf.argmax(feed_forward, axis=0)

                self.varDict["feed_forward"] = feed_forward
                self.varDict["est_labels"] = self.est_labels

            with tf.variable_scope('accuracy'):
                #Calculate accuracy
                self.injectBool = tf.placeholder_with_default(
                    False, shape=(), name="injectBool")
                self.injectAcc = tf.placeholder_with_default(0.0,
                                                             shape=None,
                                                             name="injectAcc")
                calc_accuracy = tf.reduce_mean(
                    tf.cast(tf.equal(self.est_labels, self.labels),
                            tf.float32))

                accuracy = tf.cond(self.injectBool, lambda: self.injectAcc,
                                   lambda: calc_accuracy)
                self.scalarDict["accuracy"] = accuracy

            with tf.name_scope("loss"):
                supervised_loss = tf.reduce_sum(
                    tf.log(1 + tf.exp(-onehot_labels * feed_forward)),
                    axis=1) + (self.params.weight_decay / 2) * tf.norm(
                        self.W, axis=[1, 2])
                self.scalarDict["supervised_loss"] = tf.reduce_mean(
                    supervised_loss)

            with tf.name_scope("opt"):
                D_covar = tf.matmul(
                    self.D, self.D,
                    transpose_b=True) + self.params.l2_weight * tf.eye(
                        self.params.dict_size,
                        batch_shape=[self.params.num_classes])
                #Calculate supervised gradients
                [sup_grad_wrt_a,
                 sup_grad_wrt_W] = tf.gradients(supervised_loss,
                                                [sc_activation, self.W])

                #D_covar^-1 * gradient
                sup_grad_wrt_a = tf.transpose(sup_grad_wrt_a, [0, 2, 1])
                beta = tf.matrix_solve(D_covar, sup_grad_wrt_a)

                #compute learning rate
                train_step = tf.Variable(0, name='train_step', dtype=tf.int64)
                #Updates the tf train_step with the global timestep of the object
                update_timestep = tf.assign_add(train_step, 1)
                lr = tf.minimum(
                    self.params.start_lr,
                    self.params.start_lr *
                    (self.params.decay_time / tf.cast(train_step, tf.float32)))

                #Update W
                #Note that the paper adds weight decay on W, but this is encompassed into the gradient wrt W
                self.update_W = tf.assign_add(self.W, -lr * sup_grad_wrt_W)
                D_grad_term_1 = tf.matmul(sc_activation,
                                          tf.matmul(beta,
                                                    -self.D,
                                                    transpose_a=True),
                                          transpose_a=True)
                D_grad_term_2 = tf.matmul(beta,
                                          (tile_input - self.scObj.recon))
                self.update_D = tf.assign_add(
                    self.D, -lr * (D_grad_term_1 + D_grad_term_2))

                #Normalize D
                norm_D = tf.norm(self.D, axis=2, keepdims=True)
                #Only normalize if norm > 1, i.e., l2 dict element always <= 1
                norm_D = tf.maximum(tf.ones(D_shape), norm_D)
                #Normalize after update
                #with tf.control_dependencies([self.update_D]):
                self.normalize_D = self.D.assign(self.D / norm_D)

                #Group all update ops
                #Always make sure the tf timestep is in sync with global timestep
                with tf.control_dependencies([update_timestep]):
                    self.update_step = tf.group(self.update_W, self.update_D)

                self.scalarDict["learning_rate"] = lr
コード例 #10
0
ファイル: cnn.py プロジェクト: nghuyong/ZhihuLabelsPrediction
    def cnn_layer(self, X_inputs, n_step):
        """
        TextCNN 模型。
        Args:
           X_inputs: tensor.shape=(batch_size, n_step)
        Returns:
           title_outputs: tensor.shape=(batch_size, self.n_filter_total)
        """
        inputs = tf.nn.embedding_lookup(self.embedding, X_inputs)
        inputs = tf.expand_dims(inputs, -1)
        pooled_outputs = list()
        for i, filter_size in enumerate(self.settings.filter_sizes):
            with tf.variable_scope("conv1%s" % filter_size):
                # Convolution Layer
                filter_shape = [
                    filter_size, self.settings.embedding_dim, 1,
                    self.settings.n_filter
                ]
                W_filter = weight_variable(shape=filter_shape, name='W_filter')
                beta = bias_variable(shape=[self.settings.n_filter],
                                     name='beta_filter')
                # tf.summary.histogram('beta', beta)

                conv = tf.nn.conv2d(inputs,
                                    W_filter,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")

            # conv_bn, update_ema = self.batchnorm(conv, beta, convolutional=True)  # 在激活层前面加 BN
            # Apply nonlinearity, batch norm scaling is not useful with relus
            # batch norm offsets are used instead of biases,使用 BN 层的 offset,不要 biases
            h = tf.nn.relu(conv, name="relu")

            with tf.variable_scope("conv2%s" % filter_size):
                filter_shape = [
                    filter_size, 1, self.settings.n_filter,
                    self.settings.n_filter
                ]
                W_filter = weight_variable(shape=filter_shape, name='W_filter')
                beta = bias_variable(shape=[self.settings.n_filter],
                                     name='beta_filter')
                # tf.summary.histogram('beta', beta)
                conv = tf.nn.conv2d(h,
                                    W_filter,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
            # conv_bn, update_ema = self.batch_norm(conv, beta, convolutional=True)  # 在激活层前面加 BN
            # Apply nonlinearity, batch norm scaling is not useful with relus
            # batch norm offsets are used instead of biases,使用 BN 层的 offset,不要 biases
            # h = tf.nn.relu(conv_bn, name="relu")
            h = tf.nn.relu(conv, name="relu")

            # Maxpooling over the outputs
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, n_step - filter_size * 2 + 2, 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="pool")
            pooled_outputs.append(pooled)
            # self.update_emas.append(update_ema)
        h_pool = tf.concat(pooled_outputs, 3)
        h_pool_flat = tf.reshape(h_pool, [-1, self.n_filter_total])
        return h_pool_flat  # shape = [batch_size, self.n_filter_total]
コード例 #11
0
    def __init__(self):
        self.model_name = 'transformer'
        self.settings = TransformerSetting()
        self.max_f1 = 0.0
        self.is_training = True

        with tf.name_scope('Inputs'):
            self.title_input = tf.placeholder(tf.int64,
                                              [None, self.settings.title_len],
                                              name='title_inputs')
            self.detail_input = tf.placeholder(
                tf.int64, [None, self.settings.detail_len],
                name='detail_inputs')
            self.class_input = tf.placeholder(tf.float32,
                                              [None, self.settings.class_num],
                                              name='class_input')
            self.keep_prob = tf.placeholder(tf.float32, [])
        """"===========title encoder start================"""
        """
        构建embedding层
        """
        self.title_embedded, self.lookup_table = embedding(
            self.title_input,
            vocab_size=self.settings.voc_size,
            num_units=self.settings.embedding_dim,
            scale=True,
            scope="title_embedding")

        self.title_embedded += embedding(tf.tile(
            tf.expand_dims(tf.range(self.settings.title_len), 0),
            [self.settings.batch_size, 1]),
                                         vocab_size=self.settings.title_len,
                                         num_units=self.settings.embedding_dim,
                                         zero_pad=False,
                                         scale=False,
                                         scope="title_position_embedding")[0]
        """
        Dropout
        """
        self.title_embedded = tf.layers.dropout(self.title_embedded,
                                                rate=self.keep_prob,
                                                training=tf.convert_to_tensor(
                                                    self.is_training))

        ## Blocks
        for i in range(self.settings.num_blocks):
            with tf.variable_scope("title_num_blocks_{}".format(i)):
                ### Multihead Attention
                self.title_embedded = multihead_attention(
                    queries=self.title_embedded,
                    keys=self.title_embedded,
                    num_units=self.settings.hidden_dim,
                    num_heads=self.settings.num_heads,
                    dropout_rate=self.keep_prob,
                    is_training=self.is_training,
                    causality=False)

                ### Feed Forward
                self.title_embedded = feedforward(
                    self.title_embedded,
                    num_units=[
                        4 * self.settings.hidden_dim, self.settings.hidden_dim
                    ])
        """
        sum
        """
        self.title_encoder = tf.reduce_sum(self.title_embedded, axis=1)
        """"===========title encoder end================"""
        """"===========description encoder start================"""
        """
        构建embedding层
        """
        self.description_embedded = tf.nn.embedding_lookup(
            self.lookup_table,
            self.detail_input) * (self.settings.embedding_dim**0.5)

        self.description_embedded += embedding(
            tf.tile(tf.expand_dims(tf.range(self.settings.detail_len), 0),
                    [self.settings.batch_size, 1]),
            vocab_size=self.settings.detail_len,
            num_units=self.settings.embedding_dim,
            zero_pad=False,
            scale=False,
            scope="description_position_embedding")[0]
        """
        Dropout
        """
        self.description_embedded = tf.layers.dropout(
            self.description_embedded,
            rate=self.keep_prob,
            training=tf.convert_to_tensor(self.is_training))

        ## Blocks
        for i in range(self.settings.num_blocks):
            with tf.variable_scope("description_num_blocks_{}".format(i)):
                ### Multihead Attention
                self.description_embedded = multihead_attention(
                    queries=self.description_embedded,
                    keys=self.description_embedded,
                    num_units=self.settings.hidden_dim,
                    num_heads=self.settings.num_heads,
                    dropout_rate=self.keep_prob,
                    is_training=self.is_training,
                    causality=False)

                ### Feed Forward
                self.description_embedded = feedforward(
                    self.description_embedded,
                    num_units=[
                        4 * self.settings.hidden_dim, self.settings.hidden_dim
                    ])
        """
        sum
        """
        self.description_encoder = tf.reduce_sum(self.description_embedded,
                                                 axis=1)
        """"===========description encoder end================"""
        """
        构建fully connected层
        """
        with tf.variable_scope('fc'):
            concat_output = tf.concat(
                [self.title_encoder, self.description_encoder], axis=1)
            W_fc = weight_variable(
                [self.settings.hidden_dim * 2, self.settings.fc_hidden_dim],
                name='Weight_fc')
            fc_output = tf.matmul(concat_output, W_fc, name='h_fc')
            fc_bn_relu = tf.nn.relu(fc_output, name="relu")
        """
        构建输出层
        """
        with tf.variable_scope('output'):
            W_out = weight_variable(
                [self.settings.fc_hidden_dim, self.settings.class_num],
                name='Weight_out')
            b_out = bias_variable([self.settings.class_num], name='bias_out')
            self.y_pred = tf.nn.xw_plus_b(fc_bn_relu,
                                          W_out,
                                          b_out,
                                          name='y_pred')
            self.sigmoid_y_pred = tf.nn.sigmoid(self.y_pred)
        """
        loss
        """
        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.y_pred, labels=self.class_input))
        """
        train
        """
        with tf.variable_scope('training_ops'):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.settings.lr,
                beta1=0.9,
                beta2=0.98,
                epsilon=1e-8)
            self.train_op = self.optimizer.minimize(
                self.loss, global_step=self.global_step)

        self.saver = tf.train.Saver(max_to_keep=1, name='cnn')

        print(f'{self.model_name} init finish')