Example #1
0
def model_fn(features, labels, mode, params):
    img = features['images']  # (image_n,h, w, c)
    cap = features['captions']  # (caption_n, depth)
    ass = features['assignments']  # (caption_n,)

    attn, sen = attention_fn(img, mode=mode, params=params)
    ctx = apply_attn(img=img, att=attn, sen=sen)  # (image_n, frames, c)
    decoder_ctx = tf.gather(ctx, ass, axis=0)  # (caption_n, frames, c)

    logits = decoder_fn(decoder_ctx, sen, cap, mode, params)  # (caption_n, depth, vocab)
    classes = tf.argmax(logits, axis=2)
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            "classes": classes
        }
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    else:
        loss = tf.reduce_mean(cross_entropy(labels=cap, logits=logits))
        if params.l2 > 0:
            reg = apply_regularization(l2_regularizer(params.l2), tf.trainable_variables())
            tf.summary.scalar("regularization", reg)
            loss += reg
        if mode == tf.estimator.ModeKeys.TRAIN:
            lr = tf.train.exponential_decay(params.lr,
                                            decay_rate=params.decay_rate,
                                            decay_steps=params.decay_steps,
                                            global_step=tf.train.get_global_step(),
                                            name='learning_rate',
                                            staircase=False)
            tf.summary.scalar('learning_rate', lr)
            if params.optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate=lr)
            elif params.optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=params.momentum)
            elif params.optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate=lr, momentum=params.momentum)
            else:
                raise ValueError("Unknown optimizer: {}".format(params.optimizer))
            print("Trainable: {}".format(list(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))))
            train_op = optimizer.minimize(
                loss=loss,
                global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
        else:
            eval_metric_ops = {}
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
Example #2
0
  def loss(self, data, labels):
    """The loss to minimize while training."""

    if self.is_regression:
      diff = self.training_inference_graph(data) - math_ops.to_float(labels)
      mean_squared_error = math_ops.reduce_mean(diff * diff)
      root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss")
      loss = root_mean_squared_error
    else:
      loss = math_ops.reduce_mean(
          nn_ops.sparse_softmax_cross_entropy_with_logits(
              self.training_inference_graph(data),
              array_ops.squeeze(math_ops.to_int32(labels))),
          name="loss")
    if self.regularizer:
      loss += layers.apply_regularization(self.regularizer,
                                          variables.trainable_variables())
    return loss
Example #3
0
    def _build_model(self):
        self.add_placeholder()
        # short term memory
        with tf.variable_scope("short_term"):
            conv = self.conv1d(self.input_x,
                               self.config.kernel_sizes,
                               self.config.num_filters,
                               scope="short_term")
            gru_outputs = self.gru(conv, scope="short_gru")  # [b, t, d]
            context = self.temporal_attention(gru_outputs)  # [b, d]
            last_hidden_states = gru_outputs[:, -1, :]  # [b, d]
            linear_inputs = tf.concat([context, last_hidden_states], axis=1)

        # linear_inputs [b, 2d] -> [b, nfeatures, -1]
        linear_inputs = tf.stack(tf.split(linear_inputs,
                                          self.config.nfeatures,
                                          axis=1),
                                 axis=1)
        # logits [b, nfeatures, -1] -> [b, nfeatures, nbins]
        logits = tf.layers.dense(
            linear_inputs,
            self.config.nbins,
            activation=None,
            use_bias=True,
            kernel_regularizer=self.regularizer,
            kernel_initializer=layers.xavier_initializer())
        # get predictions
        self.predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        weights = tf.ones(tf.shape(self.targets))
        self.loss = tf.contrib.seq2seq.sequence_loss(labels=self.targets,
                                                     logits=logits,
                                                     weights=weights)

        self.acc = tf.reduce_mean(
            tf.cast(tf.equal(self.predictions, self.targets),
                    dtype=tf.float32))

        if self.config.l2_lambda > 0:
            reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            reg_term = layers.apply_regularization(self.regularizer, reg_vars)
            self.loss += reg_term

        self.add_train_op()
        self.initialize_session()
Example #4
0
    def _build_model(self):
        self.add_placeholder()
        # short term memory
        with tf.variable_scope("short_term"):
            conv = self.conv1d(self.input_x,
                               self.config.kernel_sizes,
                               self.config.num_filters,
                               scope="short_term")
            gru_outputs = self.gru(conv, scope="short_gru")  # [b, t, d]
            context = self.temporal_attention(gru_outputs)  # [b, d]
            last_hidden_states = gru_outputs[:, -1, :]  # [b, d]
            linear_inputs = tf.concat([context, last_hidden_states], axis=1)

        # prediction and loss
        predictions = tf.layers.dense(
            linear_inputs,
            self.config.nfeatures,
            activation=tf.nn.tanh,
            use_bias=True,
            kernel_regularizer=self.regularizer,
            kernel_initializer=layers.xavier_initializer())
        # get auto-regression and add it to prediction from NN
        ar, ar_loss = self.auto_regressive(self.input_x, self.config.ar_lambda)
        self.predictions = predictions + ar
        self.loss = tf.losses.mean_squared_error(labels=self.targets,
                                                 predictions=self.predictions)

        error = tf.reduce_sum((self.targets - self.predictions)**2)**0.5
        denom = tf.reduce_sum(
            (self.targets - tf.reduce_mean(self.targets))**2)**0.5
        self.rse = error / denom
        self.mae = tf.reduce_mean(tf.abs(self.targets - self.predictions))
        self.mape = tf.reduce_mean(
            tf.abs((self.targets - self.predictions) / self.targets))

        if self.config.l2_lambda > 0:
            reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            reg_term = layers.apply_regularization(self.regularizer, reg_vars)
            self.loss += reg_term
        self.loss += ar_loss

        self.add_train_op()
        self.initialize_session()
    def build_graph(self):
        self.construct_weights()
        saver, logits = self.forward_pass()
        log_softmax_var = tf.nn.log_softmax(logits)
        # per-user average negative log-likelihood
        neg_ll = -tf.reduce_mean(
            tf.reduce_sum(log_softmax_var * self.input_ph, axis=1))
        # apply regularization to weights
        reg = l2_regularizer(self.lam)
        reg_var = apply_regularization(reg, self.weights)
        # tensorflow l2 regularization multiply 0.5 to the l2 norm
        # multiply 2 so that it is back in the same scale
        loss = neg_ll + 2 * reg_var

        train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)

        # add summary statistics
        tf.summary.scalar('negative_multi_ll', neg_ll)
        tf.summary.scalar('loss', loss)
        merged = tf.summary.merge_all()
        return saver, logits, loss, train_op, merged
Example #6
0
    def build_graph(self):
        self._construct_weights()

        saver, logits, KL = self.forward_pass()
        log_softmax_var = tf.nn.log_softmax(logits)

        neg_ll = -tf.reduce_mean(
            tf.reduce_sum(log_softmax_var * self.input_ph, axis=-1))
        # apply regularization to weights
        reg = l2_regularizer(self.lam)

        reg_var = apply_regularization(reg, self.weights_q + self.weights_p)
        # tensorflow l2 regularization multiply 0.5 to the l2 norm
        # multiply 2 so that it is back in the same scale
        neg_ELBO = neg_ll + self.anneal_ph * KL + 2 * reg_var

        train_op = tf.train.AdamOptimizer(self.lr).minimize(neg_ELBO)

        # add summary statistics
        tf.summary.scalar('negative_multi_ll', neg_ll)
        tf.summary.scalar('KL', KL)
        tf.summary.scalar('neg_ELBO_train', neg_ELBO)
        merged = tf.summary.merge_all()

        params = []

        for elem in self.weights_q:
            params.append(elem)

        for elem in self.weights_p:
            params.append(elem)

        for elem in self.biases_q:
            params.append(elem)

        for elem in self.biases_p:
            params.append(elem)

        return tf.nn.softmax(logits), neg_ELBO, params
Example #7
0
    def build_graph(self):

        mu_q, std_q, KL = self.q_graph()

        sampled_z = mu_q + self.is_training_ph * \
                    self.epsilon * std_q

        # p-network
        logits = self.p_graph(sampled_z)
        '''
        neg_ll = -tf.reduce_sum(self.input_ph * tf.log(logits) +
        (1 - self.input_ph) * tf.log(1 - logits), 1)
        '''
        neg_ll = tf.reduce_sum(
            tf.square(tf.nn.l2_normalize(self.input_ph, 1) - logits), 1)

        # apply regularization to weights
        reg = l2_regularizer(self.lam)

        neg_reward = self.r_graph(sampled_z)

        reg_var = apply_regularization(
            reg, self.weights_q + self.weights_p + self.weights_r)
        # tensorflow l2 regularization multiply 0.5 to the l2 norm
        # multiply 2 so that it is back in the same scale
        neg_ELBO = neg_ll + self.anneal_ph * KL + 2 * reg_var - tf.multiply(
            self.reward, neg_reward)

        train_op = tf.train.AdamOptimizer(self.lr).minimize(neg_ELBO)

        # add summary statistics
        tf.summary.scalar('negative_multi_ll', neg_ll)
        tf.summary.scalar('KL', KL)
        tf.summary.scalar('neg_ELBO_train', neg_ELBO)
        merged = tf.summary.merge_all()

        return logits, neg_ELBO, train_op, merged, sampled_z
    def test(self, test_list, modelpath):
        with self.graph.as_default():
            c3d_net = [
                ["conv", "conv1", [3, 3, 3, 3, 64], 'wc1', 'bc1'],
                ["maxpool", "pool1", [1, 1, 2, 2, 1]],
                ["conv", "conv2", [3, 3, 3, 64, 128], 'wc2', 'bc2'],
                ["maxpool", "pool2", [1, 2, 2, 2, 1]],
                ["conv", "conv3a", [3, 3, 3, 128, 256], 'wc3a', 'bc3a'],
                ["conv", "conv3b", [3, 3, 3, 256, 256], 'wc3b', 'bc3b'],
                ["maxpool", "pool3", [1, 2, 2, 2, 1]],
                ["conv", "conv4a", [3, 3, 3, 256, 512], 'wc4a', 'bc4a'],
                ["conv", "conv4b", [3, 3, 3, 512, 512], 'wc4b', 'bc4b'],
                ["maxpool", "pool4", [1, 2, 2, 2, 1]],
                ["conv", "conv5a", [3, 3, 3, 512, 512], 'wc5a', 'bc5a'],
                ["conv", "conv5b", [3, 3, 3, 512, 512], 'wc5b', 'bc5b'],
                ["maxpool", "pool5", [1, 2, 2, 2, 1]],
           #     ["transpose", [0, 1, 4, 2, 3]],  #only use it if you restore the sports1m_finetuning_ucf101.model, otherwise uncomment it,(e.g use conv3d_deepnetA_sport1m_iter_1900000_TF.model)
                ["reshape", [-1, 8192]],
                ["fc", "fc1", [8192, 4096], 'wd1', 'bd1', True],
                ["dropout", "dropout1", self.keep_prob],
                ["fc", "fc2", [4096, 4096],'wd2','bd2', True],
                ["dropout", "dropout2", self.keep_prob],
                ["fc", "fc3", [4096, self.num_class],'wout','bout',False],
            ]

            # print(tf.trainable_variables())
            # print(var_list)
            # print(tf.get_collection(tf.GraphKeys.WEIGHTS))

            # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5)
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            config.gpu_options.per_process_gpu_memory_fraction = 0.9

            with tf.Session(config=config, graph=self.graph) as sess:
                logits = self.parseNet(self.inputs, c3d_net)
                softmax_logits = tf.nn.softmax(logits)
                # int_label = tf.one_hot(self.labels, self.num_class)
                int_label = self.labels  # [bs,101]-->[bs*4 or 8 or 16,101]
                # int_label=tf.concat(
                #     [int_label,int_label,int_label,int_label,],axis=0)

                # int_label=tf.cast(int_label,dtype=tf.int64)
                task_loss = tf.reduce_sum(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=int_label))
                # task_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = int_label))
                # task_loss = -tf.reduce_sum(int_label*tf.log(logits))
                acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=-1), int_label), tf.float32))
                right_count = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=1), int_label), tf.int32))
    
                reg_loss = layers.apply_regularization(layers.l2_regularizer(5e-4),
                                                       tf.get_collection(tf.GraphKeys.WEIGHTS))
                total_loss = task_loss + reg_loss
                # train_var_list = [v for v in tf.trainable_variables() if v.name.find("conv") == -1]
                train_op = tf.train.GradientDescentOptimizer(self.lr).minimize(
                    total_loss, global_step=self.global_step)
                # train_op = tf.train.MomentumOptimizer(self.lr,0.9).minimize(
                #     total_loss, global_step = self.global_step,var_list=train_var_list)
    
    
                total_para = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])
                print('total_para:', total_para)  # all CDC9 :28613120  #pool5 27655936

                # train clip:762960
                # test  clip:302640
                init = tf.global_variables_initializer()
                # var_list = [v for v in tf.trainable_variables() if v.name.find("conv") != -1]  # 初始化只加载卷积层参数
                # print(var_list)
                sess.run(init)
                checkpath = modelpath + 'models'
                ckpt = tf.train.get_checkpoint_state(checkpath)
                if ckpt and ckpt.model_checkpoint_path:
                    print('ok!!!!!!!!!!!!!!!')
                    saver = tf.train.Saver()
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    print('resore!!!!!!!!!!!')
                else:
                    print(ckpt)



                #saver = tf.train.import_meta_graph('./'+modelpath+'models/c3d_ucf_model-4999.meta')
                #saver = tf.train.Saver(tf.trainable_variables())
                #saver.restore(sess,tf.train.latest_checkpoint('./'+modelpath+'models'))
                #saver.restore(sess,modelpath+'models/c3d_ucf_model-4999')

#                sess.run(init)
#                saver = tf.train.Saver(tf.trainable_variables())
#                saver.restore(sess, modelpath + "c3d_ucf101_finetune_whole_iter_20000_TF.model")
                print("Model Loading Done!")
                step = 0
                print_freq = 2
                next_start_pos = 0
                for one_epoch in range(1):
                    epostarttime = time.time()
                    starttime = time.time()
                    total_v = 0.0
                    test_correct_num = 0
                    for i in tqdm(range(int(3783 / self.batch_size))):
                        step += 1
                        total_v += self.batch_size
                        train_batch, label_batch, next_start_pos, _, _ = read_clip_and_label(
                            filename=test_list,
                            batch_size=self.batch_size,
                            num_frames_per_clip=self.CLIP_LENGTH,
                            height=self.IMG_HEIGHT,
                            width=self.IMG_WIDTH,
                            start_pos=next_start_pos,
                            shuffle=False
                        )
                        assert len(train_batch)==self.batch_size
                        train_batch = train_aug(train_batch, is_train=False, Crop_heith=self.CROP_HEIGHT,
                                                Crop_width=self.CROP_WIDTH,norm=True)
                        val_feed = {self.inputs: train_batch, self.labels: label_batch}
                        test_correct_num += sess.run(right_count, val_feed)
                        print('test acc:', test_correct_num / total_v, 'test_correct_num:', test_correct_num,
                              'total_v:', total_v)
Example #9
0
    def build_model(self):
        with tf.variable_scope("Input_Embedding_Layer"):
            with tf.variable_scope('Char_Conv', reuse=tf.AUTO_REUSE):
                ch_emb = tf.nn.dropout(
                    tf.nn.embedding_lookup(self.char_mat, self.contc_input),
                    1.0 - self.dropout_emb)
                qh_emb = tf.nn.dropout(
                    tf.nn.embedding_lookup(self.char_mat, self.quesc_input),
                    1.0 - self.dropout_emb)
                ch_emb = CharCNN(ch_emb, self.char_limit, self.char_dim,
                                 self.filters, self.c_maxlen)
                qh_emb = CharCNN(qh_emb, self.char_limit, self.char_dim,
                                 self.filters, self.q_maxlen)

            c_emb0 = tf.nn.dropout(
                tf.nn.embedding_lookup(self.word_mat, self.contw_input),
                1.0 - self.dropout_emb)
            q_emb0 = tf.nn.dropout(
                tf.nn.embedding_lookup(self.word_mat, self.quesw_input),
                1.0 - self.dropout_emb)

            # cove features
            if self.use_cove == 2:
                cove_cont_low, cove_cont_high = tf.unstack(tf.stop_gradient(
                    self.cove_model(c_emb0)),
                                                           axis=0)
                cove_ques_low, cove_ques_high = tf.unstack(tf.stop_gradient(
                    self.cove_model(q_emb0)),
                                                           axis=0)
            else:
                cove_cont_low, cove_cont_high = self.cove_cont_low, self.cove_cont_high
                cove_ques_low, cove_ques_high = self.cove_ques_low, self.cove_ques_high

            # pre alignment
            c2q_prealign = AttentionLayer(c_emb0, q_emb0, q_emb0, self.q_mask,
                                          self.filters, self.dropout_att)

            c_emb = tf.concat([c_emb0, ch_emb, cove_cont_low], axis=-1)
            q_emb = tf.concat([q_emb0, qh_emb, cove_ques_low], axis=-1)
            c_emb = tf.nn.dropout(c_emb, 1 - self.dropout)
            q_emb = tf.nn.dropout(q_emb, 1 - self.dropout)

            # FeedForward layer
            with tf.variable_scope('FeedForward_Layer'):
                c_emb = FeedForward(c_emb,
                                    self.filters,
                                    self.dropout,
                                    name='cont_ff')
                q_emb = FeedForward(q_emb,
                                    self.filters,
                                    self.dropout,
                                    name='ques_ff')

        with tf.variable_scope('Encoder_Layers'):
            with tf.variable_scope('Contextual_Encoder', reuse=tf.AUTO_REUSE):
                # context encode
                c_emb_low = tf.concat([c_emb, c2q_prealign, cove_cont_low],
                                      axis=-1)
                c_emb_low = BiLSTM(c_emb_low,
                                   filters=self.filters,
                                   name='cont_lstm_low',
                                   dropout=self.dropout_rnn)
                c_emb_high = tf.concat([c_emb_low, cove_cont_high], axis=-1)
                c_emb_high = BiLSTM(c_emb_high,
                                    filters=self.filters,
                                    name='cont_lstm_high',
                                    dropout=self.dropout_rnn)
                c_emb_high = tf.nn.dropout(c_emb_high, 1 - self.dropout)
                # question encode
                q_emb_low = tf.concat([q_emb, cove_ques_low], axis=-1)
                q_emb_low = BiLSTM(q_emb_low,
                                   filters=self.filters,
                                   name='ques_lstm_low',
                                   dropout=self.dropout_rnn)
                q_emb_high = tf.concat([q_emb_low, cove_ques_high], axis=-1)
                q_emb_high = BiLSTM(q_emb_high,
                                    filters=self.filters,
                                    name='ques_lstm_high',
                                    dropout=self.dropout_rnn)
                q_mem_hidden = BiLSTM(tf.concat([q_emb_low, q_emb_high],
                                                axis=-1),
                                      self.filters,
                                      self.dropout_rnn,
                                      name='ques_lstm_memory')

            # c2q encode
            with tf.variable_scope('C2Q_Attention_Encoder'):
                c_att_input = tf.concat(
                    [c_emb0, cove_cont_high, c_emb_low, c_emb_high], axis=-1)
                q_att_input = tf.concat(
                    [q_emb0, cove_ques_high, q_emb_low, q_emb_high], axis=-1)
                v_att_input = [q_emb_low, q_emb_high, q_mem_hidden]
                c2q_att_hidden = DeepAttentionLayers(c_att_input,
                                                     q_att_input,
                                                     v_att_input,
                                                     self.q_mask,
                                                     self.filters,
                                                     self.dropout_att,
                                                     name='C2Q_Attention')
                c_mem_hidden = BiLSTM(tf.concat(
                    [c2q_att_hidden, c_emb_low, c_emb_high], axis=-1),
                                      self.filters,
                                      self.dropout_rnn,
                                      name='cont_lstm_memory')
            # self attention
            with tf.variable_scope('Self_Attention_Encoder'):
                c_mem_input = tf.concat([
                    c2q_att_hidden, c_mem_hidden, c_emb_low, c_emb_high,
                    cove_cont_high, c_emb0
                ],
                                        axis=-1)
                c_self_hidden = AttentionLayer(c_mem_input, c_mem_input,
                                               c_mem_input, self.c_mask,
                                               self.filters, self.dropout_att)
                c_mem = BiLSTM(tf.concat([c_self_hidden, c_mem_hidden],
                                         axis=-1),
                               self.filters,
                               self.dropout_rnn,
                               name='cont_self_memory')
                q_mem = SumAttention(q_mem_hidden, self.q_mask,
                                     self.dropout_att)

        with tf.variable_scope('Point_Network'):
            start_scores, end_scores = SAN(c_mem,
                                           q_mem,
                                           self.c_mask,
                                           filters=self.filters * 2,
                                           hidden_size=self.filters * 2,
                                           num_turn=5,
                                           name='SAN',
                                           dropout=self.dropout)
            self.unanswer_bias = tf.get_variable(
                "unanswer_bias", [1], initializer=tf.zeros_initializer())
            self.unanswer_bias = tf.reshape(
                tf.tile(self.unanswer_bias, [self.un_size]), [-1, 1])
            start_scores = tf.concat((self.unanswer_bias, start_scores),
                                     axis=-1)
            end_scores = tf.concat((self.unanswer_bias, end_scores), axis=-1)
            c_sum = SumAttention(c_mem, self.c_mask, self.dropout_att)
            pred_score = Dense(tf.concat([c_sum, q_mem], axis=-1),
                               1,
                               norm=True,
                               dropout=self.dropout)

        with tf.variable_scope('Loss_Layer'):
            start_loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=start_scores, labels=self.y_start)
            end_loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=end_scores, labels=self.y_end)
            self.loss_a = tf.reduce_mean(start_loss + end_loss)
            answer_exist_label = tf.squeeze(tf.cast(
                tf.slice(self.y_start, [0, 0], [-1, 1]), tf.float32),
                                            axis=-1)
            self.loss_c = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=pred_score, labels=answer_exist_label))
            self.loss = self.loss_a + self.gamma * self.loss_c
            # l2 loss
            if self.l2_norm is not None:
                variables = tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES)
                l2_loss = apply_regularization(regularizer, variables)
                self.loss += self.l2_norm * tf.reduce_sum(l2_loss)

        with tf.variable_scope('Output_Layer'):
            unanswer_mask = tf.cast(tf.less(tf.nn.sigmoid(pred_score), 0.5),
                                    tf.int64)  # [bs,] has answer=1 no answer=0
            unanswer_move = unanswer_mask - 1  # [bs,] has answer=0 no answer=-1
            softmax_start_scores = tf.nn.softmax(
                tf.slice(start_scores, [0, 1], [-1, -1]))
            softmax_end_scores = tf.nn.softmax(
                tf.slice(end_scores, [0, 1], [-1, -1]))
            outer = tf.matmul(tf.expand_dims(softmax_start_scores, axis=2),
                              tf.expand_dims(softmax_end_scores, axis=1))
            outer = tf.matrix_band_part(outer, 0, self.ans_limit)

            def position_encoding(x):
                for i in range(x.shape[0]):
                    for j in range(x.shape[1]):
                        if j - i > 5:
                            x[i][j] = float(1.0 / math.log(j - i + 1))
                return x

            mask_mat = tf.ones((self.c_maxlen, self.c_maxlen))
            mask_mat = tf.expand_dims(tf.py_func(position_encoding, [mask_mat],
                                                 tf.float32),
                                      axis=0)
            mask_mat = tf.tile(mask_mat, [self.un_size, 1, 1])

            outer_masked = outer * mask_mat
            self.mask_output1 = tf.argmax(
                tf.reduce_max(outer_masked,
                              axis=2), axis=1) * unanswer_mask + unanswer_move
            self.mask_output2 = tf.argmax(
                tf.reduce_max(outer_masked,
                              axis=1), axis=1) * unanswer_mask + unanswer_move
Example #10
0
    def build_model(self):
        self._build_placeholder()

        data = None
        # Load word vocab and char vocab if we are using pretrained embedding
        if self.parameter['use_word_pretrained'] or self.parameter[
                'use_char_pretrained']:
            with open('necessary.pkl', 'rb') as f:
                data = pickle.load(f)

        self._build_word_and_char_embedding(data)

        # 각각의 임베딩 값을 가져온다
        self._embeddings = []
        self._embeddings.append(
            tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph))
        self._embeddings.append(
            tf.nn.embedding_lookup(self._embedding_matrix[1], self.character))

        # 음절을 이용한 임베딩 값을 구한다.
        character_embedding = tf.reshape(self._embeddings[1], [
            -1, self.parameter["word_length"],
            self.parameter["embedding"][1][2]
        ])
        char_len = tf.reshape(self.character_len, [-1])

        # Dropout after embedding, before lstm layer
        if self.parameter["use_dropout_after_embedding"]:
            character_embedding = tf.nn.dropout(character_embedding,
                                                self.emb_dropout_keep_prob)

        character_emb_rnn = self._build_birnn_model(
            character_embedding,
            char_len,
            self.parameter["char_lstm_units"],
            self.lstm_dropout_keep_prob,
            last=True,
            scope="char_layer")

        if self.parameter["use_lm"]:
            lm_word_embedding = tf.nn.embedding_lookup(
                self._embedding_matrix[0], self.encoder_inputs)
            lm_char_embedding = tf.nn.embedding_lookup(
                self._embedding_matrix[1], self.encoder_input_chars)
            lm_char_embedding = tf.reshape(lm_char_embedding, [
                -1, self.parameter["word_length"],
                self.parameter["embedding"][1][2]
            ])
            lm_char_len = tf.reshape(self.encoder_char_len, [-1])
            lm_char_rnn = self._build_birnn_model(
                lm_char_embedding,
                lm_char_len,
                self.parameter["char_lstm_units"],
                self.lstm_dropout_keep_prob,
                last=True,
                scope="char_layer",
                lm=True)
            lm_all_emb = tf.concat(
                [self.lm_ne_dict, lm_word_embedding, lm_char_rnn], axis=2)
            if self.parameter["use_highway"]:
                lm_all_emb = self._build_highway(lm_all_emb,
                                                 self.parameter["num_layers"],
                                                 scope="highway")

        # 위에서 구한 모든 임베딩 값을 concat 한다.
        all_data_emb = self.ne_dict
        for i in range(0, len(self._embeddings) - 1):
            all_data_emb = tf.concat([all_data_emb, self._embeddings[i]],
                                     axis=2)
        all_data_emb = tf.concat([all_data_emb, character_emb_rnn], axis=2)

        if self.parameter["use_highway"]:
            all_data_emb = self._build_highway(all_data_emb,
                                               self.parameter["num_layers"],
                                               scope="highway")

        # Dropout after embedding, before lstm layer
        if self.parameter["use_dropout_after_embedding"]:
            all_data_emb = tf.nn.dropout(all_data_emb,
                                         self.emb_dropout_keep_prob)

        output_lst = []
        # --------------------------------------- Add CONV Layer -------------------------------------------#
        # 1d depthwise-separable convolution
        if self.parameter["use_lm"]:
            conv_output = self._build_conv(lm_all_emb,
                                           self.parameter["kernel_sizes"],
                                           self.parameter["num_filters"],
                                           self.encoder_length,
                                           auto_regressive=True)
            self.lm_loss = self.lm_loss(conv_output,
                                        self.encoder_targets,
                                        NUM_VOCAB,
                                        self.encoder_length,
                                        scope="lm_loss")
            # remove go token
            conv_output = conv_output[:, 1:, :]
        else:
            conv_output = self._build_conv(all_data_emb,
                                           self.parameter["kernel_sizes"],
                                           self.parameter["num_filters"],
                                           self.sequence,
                                           auto_regressive=False)
        output_lst.append(conv_output)
        # --------------------------------------------------------------------------------------------------#

        # 모든 데이터를 가져와서 Bi-RNN 실시
        lstm_output = self._build_birnn_model(all_data_emb,
                                              self.sequence,
                                              self.parameter["lstm_units"],
                                              self.lstm_dropout_keep_prob,
                                              scope="all_data_layer")
        output_lst.append(lstm_output)

        # self attention
        if self.parameter["use_self_attention"]:
            aligned_outputs = self._attention(lstm_output,
                                              self.parameter["lstm_units"],
                                              self.parameter["num_heads"],
                                              self.sequence,
                                              scope="attention_small")
            output_lst.append(aligned_outputs)

        if len(output_lst) != 1:
            outputs = tf.concat(output_lst, axis=2)
        else:
            outputs = output_lst[0]

        outputs = tf.nn.dropout(outputs, self.dropout_rate)
        # [b, t, 3*d] -> [b, t, C]
        logits = self._build_dense_layer(outputs)

        # crf layer
        crf_cost = self._build_crf_layer(logits)
        if self.parameter["use_reg_loss"]:
            reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            reg_term = layers.apply_regularization(self.regularizer, reg_vars)
            self.cost = crf_cost + reg_term
        else:
            self.cost = crf_cost

        if self.parameter["use_lm"]:
            self.cost += self.lm_loss * self.parameter["gamma"]

        self.train_op = self._build_output_layer(self.cost)

        # Exponential moving average
        if self.parameter["use_ema"]:
            var_ema = tf.train.ExponentialMovingAverage(
                decay=self.parameter["ema_decay_rate"])
            ema_op = var_ema.apply(tf.trainable_variables())
            with tf.control_dependencies([ema_op]):
                self.cost = tf.identity(self.cost)
    def build(self, x, y, batch_size, learning_rate):

        # ==== conditional variational auto-encoder
        mu, sigma = self.encode(x)
        z = mu + sigma * tf.random_normal(tf.shape(mu))
        z_sample = tf.random_normal(tf.shape(mu))
        dis_z = self.discriminate(z)
        dis_z_sample = self.discriminate(z_sample)

        x_hat = self.decode(z)
        y_hat = self.classify(z)

        z_fake = tf.random_normal(tf.shape(mu))
        x_fake_hat = self.decode(z_fake)

        # ==== variables
        variables = dict()
        var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

        sizes = 0
        for v in var:
            sv = v.value().shape
            size = 1
            for s in sv:
                size *= int(s)

            sizes += size
        print('total variables:', sizes)

        var_enc = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._encoder_name)
        var_gen = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._generator_name)
        var_cla = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._classifier_name)
        var_dis = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._discriminator_name)

        variables['enc'] = var_enc
        variables['gen'] = var_gen
        variables['cla'] = var_cla
        variables['dis'] = var_dis

        # ==== define losses
        loss = dict()

        # Loss stage1: SAE
        loss['d_cla'] = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_hat))
        loss['d_KL'] = self.KL_divergence(mu, sigma)
        loss['g_rec'] = tf.reduce_sum(tf.square(x - x_hat)) / batch_size

        loss_reg = tcl.apply_regularization(tcl.l2_regularizer(1e-4), weights_list=var_enc + var_gen + var_cla)

        loss['SAE'] = loss['d_cla'] + loss['d_KL'] + loss['g_rec'] + loss_reg

        # Loss stage2: Gan
        loss['dis'] = tf.reduce_mean(0.5 * tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_z, labels=tf.ones_like(dis_z)) +
                                     0.5 * tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_z_sample, labels=tf.zeros_like(dis_z_sample)))

        # ==== define optimizer
        optimizer = tf.train.AdamOptimizer
        optim = dict()

        # ---- Stage1: svae optimizer
        optim['SAE'] = optimizer(learning_rate=learning_rate, beta1=0.5).minimize(loss['SAE'], var_list=var_enc + var_gen + var_cla)

        # ---- Stage 2: gan optimizer
        optim['DIS'] = optimizer(learning_rate=learning_rate, beta1=0.5).minimize(loss['dis'], var_list=var_dis)

        # ==== define metrics
        metric = dict()

        # ---- svae metric
        metric['acc'] = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), y), tf.float32))
        metric['x_hat'] = x_hat
        metric['latent'] = mu
        metric['x_fake_hat'] = x_fake_hat

        # ---- gan metric
        metric['acc_dis_true'] = tf.reduce_mean(tf.cast(dis_z >= 0.5, tf.float32))
        metric['acc_dis_fake'] = tf.reduce_mean(tf.cast(dis_z_sample < 0.5, tf.float32))

        return loss, optim, metric, variables
Example #12
0
def model_fn(features, labels, mode, params):
    img = features['images']  # (image_n,h, w, c)

    temperature = get_temperature(params)
    img_attn, img_sen = attention_fn(img,
                                     temperature=temperature,
                                     mode=mode,
                                     params=params)

    if params.use_slot_vocab:
        img_ctx = apply_attn(
            img=img, att=img_attn)  # , sen=img_sen)  # (image_n, frames, c)
        slot_vocab = slot_vocab_fn(img_ctx=img_ctx,
                                   params=params)  # (image_n, frames, vocab+1)
        if img_sen is not None:
            slot_vocab *= tf.expand_dims(img_sen, axis=2)
    else:
        img_ctx = apply_attn(img=img, att=img_attn,
                             sen=img_sen)  # (image_n, frames, c)
        slot_vocab = None

    if mode == tf.estimator.ModeKeys.PREDICT:
        n = tf.shape(img_ctx)[0]
        if params.vae_dim > 0:
            enc = tf.random_normal(shape=(n, params.vae_dim),
                                   mean=0,
                                   stddev=1,
                                   dtype=tf.float32)
        else:
            enc = None
        logits, slot_attn, slot_sentinel, y1 = predict_decoder_fn(
            slot_vocab=slot_vocab,
            img_ctx=img_ctx,
            sen=img_sen,
            enc=enc,
            params=params,
            depth=30,
            temperature=temperature,
            mode=mode)
        predictions = {
            'captions': y1,
            'image_ids':
            tf.get_default_graph().get_tensor_by_name('image_ids:0'),
            'slot_attention': slot_attn,
            'slot_sentinel': slot_sentinel,
            'image_attention': img_attn
        }
        if slot_vocab is not None:
            predictions['slot_vocab'] = slot_vocab
        if img_sen is not None:
            predictions['image_sentinel'] = img_sen

        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    else:
        raw_cap = features['captions']  # (caption_n, depth)
        cap = tf.maximum(raw_cap - 1, 0)  # (caption_n, depth)
        cap_mask = 1. - tf.cast(tf.equal(raw_cap, 0),
                                tf.float32)  # (caption_n, depth)
        ass = features['assignments']  # (caption_n,)
        if slot_vocab is not None:
            decoder_vocab = tf.gather(slot_vocab, ass,
                                      axis=0)  # (caption_n, frames, c)
        else:
            decoder_vocab = None
        if img_sen is not None:
            decoder_sen = tf.gather(img_sen, ass,
                                    axis=0)  # (caption_n, frames)
        else:
            decoder_sen = None
        decoder_img_ctx = tf.gather(img_ctx, ass, axis=0)
        if params.vae_dim > 0:
            mu, raw_sig = encoder_fn(img_ctx=decoder_img_ctx,
                                     sen=decoder_sen,
                                     slot_vocab=decoder_vocab,
                                     mask=cap_mask,
                                     cap=cap,
                                     temperature=temperature,
                                     params=params,
                                     mode=mode)
            sigma = EPSILON + tf.exp(raw_sig)
            enc = mu + (sigma *
                        tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32))
            kl_loss = 0.5 * tf.reduce_sum(
                tf.square(mu) + tf.square(sigma) -
                tf.log(EPSILON + tf.square(sigma)) - 1, 1)
            kl_loss = tf.reduce_mean(kl_loss, 0)
            tf.summary.scalar('kl_loss', kl_loss)
        else:
            enc = None

        logits, slot_attn, slot_sentinel = train_decoder_fn(
            slot_vocab=decoder_vocab,
            img_ctx=decoder_img_ctx,
            sen=decoder_sen,
            cap=cap,
            temperature=temperature,
            params=params,
            mode=mode,
            enc=enc)

        # Loss
        if params.loss == 'cross_entropy':
            loss = tf.reduce_mean(
                cross_entropy_loss(labels=cap,
                                   mask=cap_mask,
                                   logits=logits,
                                   smoothing=params.smoothing))
        elif params.loss == 'nll':
            loss = tf.reduce_mean(
                nll_loss(labels=cap, mask=cap_mask, logits=logits, mean=False))
        else:
            raise ValueError()

        if params.vae_dim > 0:
            kl_weight = get_kl_weight(params)
            loss += kl_weight * kl_loss

        # Regularization
        # slot_attn: (n, depth, frame_size)
        # slot_sentinel: (n, depth, 1)
        if params.l2 > 0:
            reg = apply_regularization(l2_regularizer(params.l2),
                                       tf.trainable_variables())
            tf.summary.scalar("regularization", reg)
            loss += reg
        if params.unity_reg > 0 and decoder_sen is not None:
            slot_sum = tf.reduce_sum(tf.expand_dims(cap_mask, 2) * slot_attn *
                                     slot_sentinel,
                                     axis=1)  # (n, frame_size)
            slot_diff = tf.square(slot_sum - decoder_sen)
            unity_regularization = params.unity_reg * tf.reduce_mean(
                tf.reduce_sum(slot_diff, 1))
            tf.summary.scalar("unity_regularization", unity_regularization)
            loss += unity_regularization
        if (params.img_sen_l1 > 0
                or params.img_sen_l2 > 0) and img_sen is not None:
            img_sen_reg = 0
            if params.img_sen_l1 > 0:
                img_sen_reg += params.img_sen_l1 * tf.reduce_mean(
                    tf.reduce_sum(img_sen, axis=1), axis=0)
            if params.img_sen_l2 > 0:
                img_sen_reg += params.img_sen_l2 * tf.reduce_mean(
                    tf.square(tf.reduce_sum(img_sen, axis=1)), axis=0)
            tf.summary.scalar('image_sentinel_regularization', img_sen_reg)
            loss += img_sen_reg

        if mode == tf.estimator.ModeKeys.TRAIN:
            lr = tf.train.exponential_decay(
                params.lr,
                decay_rate=params.decay_rate,
                decay_steps=params.decay_steps,
                global_step=tf.train.get_global_step(),
                name='learning_rate',
                staircase=False)
            tf.summary.scalar('learning_rate', lr)
            if params.optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate=lr)
            elif params.optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(
                    learning_rate=lr, momentum=params.momentum)
            elif params.optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate=lr,
                                                      momentum=params.momentum)
            else:
                raise ValueError("Unknown optimizer: {}".format(
                    params.optimizer))
            print("Trainable: {}".format(
                list(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))))
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)
        else:
            eval_metric_ops = {}
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              eval_metric_ops=eval_metric_ops)
Example #13
0
def buildGraph(input_placeholder_s1, input_placeholder_s2, labels_placeholder,
               mask_placeholder_s1, mask_placeholder_s2, dropout_placeholder,
               embeddings_matrix):
    params = tf.Variable(embeddings_matrix)
    tensor_s1 = tf.nn.embedding_lookup(params, input_placeholder_s1)
    tensor_s2 = tf.nn.embedding_lookup(params, input_placeholder_s2)
    embeddings_s1 = tf.reshape(tensor_s1, [-1, max_length, embed_size])
    embeddings_s2 = tf.reshape(tensor_s2, [-1, max_length, embed_size])
    #print embeddings_s1.shape
    #print tf.boolean_mask(embeddings_s1, mask_placeholder_s1, axis=1).shape
    #embeddings = tf.concat([tf.reduce_mean(tf.boolean_mask(embeddings_s1, mask_placeholder_s1), axis=1), tf.reduce_mean(tf.boolean_mask(embeddings_s2, mask_placeholder_s2), axis=1)], 0)
    #print embeddings.shape

    dropout_rate = dropout_placeholder

    preds = []
    cell1 = LSTMCell(embed_size, hidden_size)
    cell2 = LSTMCell2(embed_size, hidden_size)

    c = tf.zeros([tf.shape(embeddings_s1)[0], hidden_size])
    h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size])
    initial_state = tf.contrib.rnn.LSTMStateTuple(c, h)
    l1 = tf.reduce_sum(tf.cast(mask_placeholder_s1, tf.int32), axis=1)
    outputs1, state1 = tf.nn.dynamic_rnn(cell1,
                                         embeddings_s1,
                                         dtype=tf.float32,
                                         initial_state=initial_state,
                                         sequence_length=l1)
    h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size])
    initial_state = tf.contrib.rnn.LSTMStateTuple(state1.c, h)
    l2 = tf.reduce_sum(tf.cast(mask_placeholder_s2, tf.int32), axis=1)
    outputs2, state2 = tf.nn.dynamic_rnn(cell2,
                                         embeddings_s2,
                                         dtype=tf.float32,
                                         initial_state=initial_state,
                                         sequence_length=l2)

    #START HERE, CHECK PREDS, DO BITMASK FOR LOSSES, MAKE SURE OPTIMIZING CORRECT FUNCTION
    func = xavier_weight_init()
    U = tf.Variable(func([hidden_size, n_classes]))
    b1 = tf.Variable(tf.zeros([1, n_classes]))
    h_drop = tf.nn.dropout(state2.h, keep_prob=1 - dropout_rate)
    pred = tf.matmul(h_drop, U) + b1
    tf.add_to_collection('ops_to_restore', pred)
    #pred = tf.add(tf.matmul(h_drop, U), b1, name="pred")

    loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_placeholder,
                                                   logits=pred)
    loss = tf.reduce_mean(loss)
    regularizer = l1_l2_regularizer(l1_reg, l2_reg)
    reg_loss = apply_regularization(regularizer, tf.trainable_variables())
    loss += reg_loss
    #y = labels_placeholder
    #loss = tf.nn.l2_loss(y-preds)
    #loss = tf.reduce_mean(loss)

    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    #train_op = optimizer.minimize(loss)

    #optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
    gradients = optimizer.compute_gradients(loss)
    grads = [x[0] for x in gradients]
    grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm)
    gradients = [(grads[i], gradients[i][1]) for i in range(len(grads))]
    train_op = optimizer.apply_gradients(gradients)
    return pred, loss, train_op
Example #14
0
  def build_graph(self):
    """Builds the neural network graph."""

    # define graph
    self.g = tf.Graph()
    with self.g.as_default():

      # create and store a new session for the graph
      self.sess = tf.Session()

      # define placeholders
      self.x = tf.placeholder(shape=[None, self.dim_input], dtype=tf.float32)
      self.y = tf.placeholder(shape=[None, self.num_classes], dtype=tf.float32)

      # define simple model
      with tf.variable_scope('last_layer'):
        self.z = tf.layers.dense(inputs=self.x, units=self.num_classes)

      self.loss = tf.reduce_mean(
          tf.nn.softmax_cross_entropy_with_logits_v2(
              labels=self.y, logits=self.z))

      self.output_probs = tf.nn.softmax(self.z)

      # Variables of the last layer
      self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
      self.ll_vars_concat = tf.concat(
          [self.ll_vars[0],
           tf.expand_dims(self.ll_vars[1], axis=0)], 0)

      # Summary
      _variable_summaries(self.ll_vars_concat)

      # add regularization that acts as a unit Gaussian prior on the last layer
      regularizer = contrib_layers.l2_regularizer(1.0)

      # regularization
      prior = contrib_layers.apply_regularization(regularizer, self.ll_vars)
      self.bayesian_loss = self.n * self.loss + prior

      # saving the weights of last layer when running SGLD/SGD/MCMC algorithm
      self.saver = tf.train.Saver(var_list=self.ll_vars,
                                  max_to_keep=self.num_samples)

      # SGLD optimizer for the last layer
      if self.sampler in ['sgld', 'lmc']:
        step = self.step_size / self.n
        gd_opt = tf.train.GradientDescentOptimizer(step)
        grads_vars = gd_opt.compute_gradients(self.bayesian_loss)
        grads_vars_sgld = []

        for g, v in grads_vars:
          if g is not None:
            s = list(v.name)
            s[v.name.rindex(':')] = '_'
            # Adding Gaussian noise to the gradient
            gaussian_noise = (np.sqrt(2. / step)
                              * tf.random_normal(tf.shape(g)))
            g_sgld = g + gaussian_noise
            tf.summary.histogram(''.join(s) + '/grad_hist_mcmc',
                                 g / self.n)
            tf.summary.histogram(''.join(s) + '/gaussian_noise_hist_mcmc',
                                 gaussian_noise / self.n)
            tf.summary.histogram(''.join(s) + '/grad_total_hist_mcmc',
                                 g_sgld / self.n)
            grads_vars_sgld.append((g_sgld, v))

        self.train_op = gd_opt.apply_gradients(grads_vars_sgld)

      # SGD optimizer for the last layer
      if self.sampler == 'sgd':
        gd_opt = tf.train.GradientDescentOptimizer(self.step_size)
        grads_vars_sgd = gd_opt.compute_gradients(self.loss)
        self.train_op = gd_opt.apply_gradients(grads_vars_sgd)

        for g, v in grads_vars_sgd:
          if g is not None:
            s = list(v.name)
            s[v.name.rindex(':')] = '_'
            tf.summary.histogram(''.join(s) + '/grad_hist_sgd', g)

      # Merge all the summaries and write them out
      self.all_summaries = tf.summary.merge_all()
      location = os.path.join(self.working_dir, 'logs')
      self.writer = tf.summary.FileWriter(location, graph=self.g)

      saver_network = tf.train.Saver(var_list=self.ll_vars)
      print('loading the network ...')
      # Restores from checkpoint
      # self.sess.run(tf.global_variables_initializer())
      saver_network.restore(self.sess, self.model_dir)
      print('Graph successfully loaded.')
def train():
  start_time_first = time.time()
  WORK_DIRECTORY = FLAGS.VIEW_PATH
  train_size, test_size, val_size = get_size()
  fqbt, rbt = init_bin_file('data/train.bin')
  fqbv, rbv = init_bin_file('data/val.bin')
  fqbe, rbe = init_bin_file('data/test.bin')

  data_node = tf.placeholder(tf.float32, shape=(None, FLAGS.NUM_SPEC, FLAGS.CHANNEL_NUMBER))
  labels_node = tf.placeholder(tf.int64, shape=(None, FLAGS.LABEL_NUMBER))
  keep_hidden = tf.placeholder(tf.float32)
  logits = model(data_node, keep_hidden)
  preds = tf.nn.softmax(logits, name='Output')

  tvs = [tv for tv in tf.trainable_variables()]
  count_trainable_params(tvs)

  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_node))
  loss += apply_regularization(l2_regularizer(5e-4), tf.trainable_variables())

  batch = tf.Variable(0, trainable=False)
  learning_rate = tf.train.exponential_decay(0.01, batch * FLAGS.BATCH_SIZE,
                                             train_size, 0.95, staircase=True)
  optimizer = tf.train.MomentumOptimizer(
      learning_rate, 0.9).minimize(loss, global_step=batch)
  eval_predictions = tf.nn.softmax(model(data_node, keep_hidden, reuse=True))

  train_label_node, train_data_node = get_train_data(fqbt, rbt)
  val_label_node, val_data_node = get_train_data(fqbv, rbv)
  test_label_node, test_data_node = get_train_data(fqbe, rbe)

  saver = tf.train.Saver(tf.global_variables())

  TRAIN_FREQUENCY = train_size // FLAGS.BATCH_SIZE * 2
  TEST_FREQUENCY = TRAIN_FREQUENCY
  VAL_FREQUENCY = TRAIN_FREQUENCY
  SAVE_FREQUENCY = 10 * train_size // FLAGS.BATCH_SIZE

  with tf.Session() as sess:
    sess.run(tf.local_variables_initializer())
    sess.run(tf.global_variables_initializer())
    summary_writer = tf.summary.FileWriter(WORK_DIRECTORY, sess.graph)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    try:
      while not coord.should_stop():
        start_time = time.time()
        for step in xrange(int(FLAGS.NUM_EPOCHS * train_size) // FLAGS.BATCH_SIZE):
          train_data, train_label = sess.run([train_data_node, train_label_node])
          feed_dict = {data_node: train_data,
                       labels_node: train_label, keep_hidden: 0.5}
          _, l, lr, pred = sess.run(
              [optimizer, loss, learning_rate, preds], feed_dict=feed_dict)
          if step != 0 and step % TRAIN_FREQUENCY == 0:
            et = time.time() - start_time
            print('Step %d (epoch %.2f), %.1f ms' %
                  (step, float(step) * FLAGS.BATCH_SIZE / train_size, 1000 * et / TRAIN_FREQUENCY))
            print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
            print('Train accuracy: {:.3f}'.format(accuracy(pred, train_label)))
            start_time = time.time()
          if step != 0 and step % VAL_FREQUENCY == 0:
            val_label_total = np.zeros(
                (val_size // FLAGS.BATCH_SIZE * FLAGS.BATCH_SIZE, FLAGS.LABEL_NUMBER))
            prediction_total = np.zeros(
                (val_size // FLAGS.BATCH_SIZE * FLAGS.BATCH_SIZE, FLAGS.LABEL_NUMBER))
            for ti in xrange(val_size // FLAGS.BATCH_SIZE):
              offset = ti * FLAGS.BATCH_SIZE
              val_data, val_label = sess.run([val_data_node, val_label_node])
              predictions = eval_in_batches(
                  val_data, sess, eval_predictions, data_node, keep_hidden)
              prediction_total[offset:offset + FLAGS.BATCH_SIZE, :] = predictions
              val_label_total[offset:offset + FLAGS.BATCH_SIZE] = val_label
            acc = accuracy(prediction_total, val_label_total)
            print('Accuracy of validation: {:.3f}'.format(acc))
            start_time = time.time()
          if step != 0 and step % TEST_FREQUENCY == 0:
            test_label_total = np.zeros(
                (test_size // FLAGS.BATCH_SIZE * FLAGS.BATCH_SIZE, FLAGS.LABEL_NUMBER))
            prediction_total = np.zeros(
                (test_size // FLAGS.BATCH_SIZE * FLAGS.BATCH_SIZE, FLAGS.LABEL_NUMBER))
            for ti in xrange(test_size // FLAGS.BATCH_SIZE):
              offset = ti * FLAGS.BATCH_SIZE
              test_data, test_label = sess.run([test_data_node, test_label_node])
              predictions = eval_in_batches(
                  test_data, sess, eval_predictions, data_node, keep_hidden)
              prediction_total[offset:offset + FLAGS.BATCH_SIZE, :] = predictions
              test_label_total[offset:offset + FLAGS.BATCH_SIZE] = test_label
            acc = accuracy(prediction_total, test_label_total)
            print('Accuracy of test: {:.3f}'.format(acc))
            start_time = time.time()
          if step % SAVE_FREQUENCY == 0 and step != 0:
            if FLAGS.SAVE_MODEL:
              checkpoint_path = os.path.join(WORK_DIRECTORY, 'model.ckpt')
              saver.save(sess, checkpoint_path, global_step=step)
        else:
          if FLAGS.SAVE_MODEL:
            checkpoint_path = os.path.join(WORK_DIRECTORY, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)
          coord.request_stop()
    except tf.errors.OutOfRangeError:
      print('Done training -- epoch limit reached')
    finally:
      pass
    coord.join(threads)
  print('All training process costs {:.2f} seconds...'.format(time.time() - start_time_first))
Example #16
0
    def train(self, batch_xs, batch_ys, iterations):
        '''

        :param batch_xs:
        :param batch_ys: shape [batch_size, 1]
        :param iterations: Iterations per epoch
        :return:
        '''
        assert type(iterations) is int and iterations > 0
        assert self.mode is 'train', 'current mode is %s, not training mode' % self.mode
        assert self.initialized, 'initialize_weights() or load_weights() must be called before call train()'
        logits = tf.squeeze(self.features, name='probability')
        prediction = tf.argmax(logits,
                               axis=-1,
                               output_type=tf.int32,
                               name='prediction')
        # Summaries log configurations
        batch_ys = tf.reshape(batch_ys, [self.config.BATCH_SIZE, -1])
        loss_t = self._init_loss(logits, batch_ys)
        if self.config.USE_REGULARIZER:
            reg = layers.l2_regularizer(self.config.REGULARIZE_SCALE)
            loss_t += layers.apply_regularization(reg,
                                                  tf.trainable_variables())
        mAP_t = tf.reduce_mean(
            tf.cast(tf.equal(prediction, batch_ys), dtype=tf.float32))
        mAP_log = tf.summary.scalar('mAP', mAP_t)
        loss_log = tf.summary.scalar('loss', loss_t)
        summary_t = tf.summary.merge([mAP_log, loss_log])
        # global_steps
        # global_step = tf.get_variable('global_step', dtype=tf.int32, trainable=False,
        #                               initializer=tf.constant_initializer)
        global_step = tf.Variable(self.global_step_init_value, trainable=False)
        # variable averages operation
        variable_averages = tf.train.ExponentialMovingAverage(
            decay=0.99, num_updates=global_step)
        variable_averages_op = variable_averages.apply(
            tf.trainable_variables())
        # Exponential decay learning rate and optimizer configurations
        learning_rate = tf.train.exponential_decay(self.config.LEARNING_RATE,
                                                   global_step,
                                                   decay_steps=100,
                                                   decay_rate=0.96,
                                                   staircase=True,
                                                   name='learning_rate')
        if self.config.OPTIMIZER == 'sgd':
            optim = tf.train.GradientDescentOptimizer(learning_rate)
        elif self.config.OPTIMIZER == 'adam':
            optim = tf.train.AdamOptimizer(learning_rate)
        else:
            print('[!]Unidentifiable optimizer: ' + self.config.OPTIMIZER)
            raise NotImplementedError
        train_step = optim.minimize(loss_t,
                                    global_step=global_step,
                                    name=self.config.OPTIMIZER)
        train_op = tf.group(train_step, variable_averages_op)
        # Init global step and learning rate
        self.sess.run(global_step.initializer)
        # Train
        time_begin = datetime.now()
        suffix = str(int(time()))
        writer = tf.summary.FileWriter(self.ckpt_dir,
                                       self.sess.graph,
                                       filename_suffix=suffix)
        logging.basicConfig(filename=os.path.join(
            self.ckpt_dir, 'train.output-{}.txt'.format(suffix)),
                            level=logging.DEBUG)
        loops = iterations * self.config.TRAINING_EPOCH
        for it in range(loops):
            try:
                _, loss_val, sum_str, step_val, = self.sess.run(
                    [
                        train_op,
                        loss_t,
                        summary_t,
                        global_step,
                    ],
                    feed_dict={self._batch_xs: batch_xs})  # main train step
                if it % self.config.LOG_INTERVAL == 0:  # log summaries
                    step_val -= 1  # As 'global_step_t' already increased after 'sess.run(train_op)', here we decrease 'step_val' by one
                    writer.add_summary(sum_str, step_val)
                    time_elapse = datetime.now() - time_begin
                    time_remain = time_elapse / (it + 1) * (loops - it - 1)
                    msg = 'elapsed time:{} remaining time:{} step:{} loss:{}'. \
                        format(time_elapse, time_remain, step_val, loss_val)
                    logmsg = processBar(it, loops, msg, 50)
                    logging.info(logmsg)
                    # if loss_val < save_limit and FLAGS.auto_save:
                    #     save_limit = loss_val
                    #     save(self.sess,
                    #          os.path.join(self.ckpt_dir, 'tmp_loss{:.3f}'.format(loss_val) + FLAGS.model_name),
                    #          step_val)
            except tf.errors.InvalidArgumentError as e:
                print(
                    'An error of type tf.errors.InvalidArgumentError has been ignored...'
                )
                print(e.message)
                logging.error('tf.errors.InvalidArgumentError:\r\n' +
                              e.message)
                continue
            except tf.errors.OutOfRangeError:
                writer.add_summary(sum_str, step_val)
                msg = 'Epoch reach the end, final loss value is {}'.format(
                    loss_val)
                logmsg = processBar(it, loops, msg, 50)
                logging.info(logmsg)
                break
        time_elapse = datetime.now() - time_begin
        print('Training finish, elapsed time %s' % time_elapse)
Example #17
0
    def build_model(self):
        # add place holder
        self.contexts = tf.placeholder(shape=[None, None],
                                       dtype=tf.int32,
                                       name="context")
        self.context_legnths = tf.placeholder(shape=[None],
                                              dtype=tf.int32,
                                              name="c_length")
        self.questions = tf.placeholder(shape=[None, None],
                                        dtype=tf.int32,
                                        name="q")
        self.question_legnths = tf.placeholder(shape=[None],
                                               dtype=tf.int32,
                                               name="q_len")
        # [batch, num_sentences, num_words]
        self.sentences = tf.placeholder(shape=[None, None, None],
                                        dtype=tf.int32,
                                        name="sentences")
        # [num_sentences, num_words]
        self.sequence_lengths = tf.placeholder(shape=[None, None],
                                               dtype=tf.int32,
                                               name="seq_len")
        # [num_sentences]
        self.sentence_lengths = tf.placeholder(shape=[None],
                                               dtype=tf.int32,
                                               name="sent_len")
        self.sentence_idx = tf.placeholder(shape=[None],
                                           dtype=tf.int32,
                                           name="sent_idx")
        self.answerable = tf.placeholder(shape=[None],
                                         dtype=tf.int32,
                                         name="answ")
        self.answer_span = tf.placeholder(shape=[None, 2],
                                          dtype=tf.int32,
                                          name="answer_span")
        self.dropout = tf.placeholder(dtype=tf.float32, name="dropout")

        self.avg_loss = tf.placeholder(dtype=tf.float32, name="avg_loss")
        self.avg_em = tf.placeholder(dtype=tf.float32, name="avg_em")
        self.avg_acc = tf.placeholder(dtype=tf.float32, name="avg_acc")
        loss_summary = tf.summary.scalar("loss", self.avg_em)
        acc_summary = tf.summary.scalar("accuracy", self.avg_acc)
        em_summary = tf.summary.scalar("em", self.avg_em)
        self.merged = tf.summary.merge([loss_summary, acc_summary, em_summary])

        self.document_size, self.sentence_size, self.word_size = tf.unstack(
            tf.shape(self.sentences))
        # add embeddings
        zeros = tf.constant([[0.0] * self.config.embedding_size])
        unk_dummy = tf.get_variable(shape=[2, self.config.embedding_size],
                                    initializer=layers.xavier_initializer(),
                                    name="special_token")
        # load pre-trained GloVe
        embedding_matrix = tf.Variable(initial_value=self.config.embeddings,
                                       trainable=False,
                                       dtype=tf.float32,
                                       name="embedding")
        self.embedding_matrix = tf.concat([zeros, unk_dummy, embedding_matrix],
                                          axis=0)
        self.embedded_sentences = tf.nn.embedding_lookup(
            self.embedding_matrix, self.sentences)
        self.embedded_sentences = tf.layers.dropout(self.embedded_sentences,
                                                    self.dropout)
        self.embedded_context = tf.nn.embedding_lookup(self.embedding_matrix,
                                                       self.contexts)
        self.embedded_context = tf.layers.dropout(self.embedded_context,
                                                  self.dropout)
        self.embedded_questions = tf.nn.embedding_lookup(
            self.embedding_matrix, self.questions)
        self.embedded_questions = tf.layers.dropout(self.embedded_questions,
                                                    self.dropout)
        # conv block and self attention block
        with tf.variable_scope("Embedding_Encoder_Layer"):
            contexts = self.residual_block(self.embedded_context,
                                           self.context_legnths,
                                           num_blocks=1,
                                           num_conv_blocks=4,
                                           kernel_size=7,
                                           num_filters=128,
                                           scope="Embedding_Encoder",
                                           reuse=False)
            questions = self.residual_block(self.embedded_questions,
                                            self.question_legnths,
                                            num_blocks=1,
                                            num_conv_blocks=4,
                                            kernel_size=7,
                                            num_filters=128,
                                            scope="Embedding_Encoder",
                                            reuse=True)
            reshaped_sentences = tf.reshape(
                self.embedded_sentences,
                [-1, self.word_size, self.config.embedding_size])
            sentence_len = tf.reshape(self.sequence_lengths, [-1])
            encoded_sentence = self.residual_block(reshaped_sentences,
                                                   sentence_len,
                                                   num_blocks=1,
                                                   num_conv_blocks=1,
                                                   kernel_size=7,
                                                   num_filters=128,
                                                   scope="Embedding_Encoder",
                                                   reuse=True)

        with tf.variable_scope("hierarchical_attention") and tf.device(
                "/device:GPU:0"):
            # [b * s, w, d]
            cnn_inputs = tf.layers.dense(
                encoded_sentence,
                self.config.filter_size,
                kernel_regularizer=self.regularizer,
                kernel_initializer=layers.xavier_initializer(),
                activation=tf.nn.relu)
            sentence_cnn = self.conv_encoder(cnn_inputs,
                                             self.config.filter_size,
                                             scope="word_encoder",
                                             reuse=False)
            encoded_question = self.question_encoding(questions,
                                                      self.question_legnths)
            # [b, s, d]
            sentence_vectors = self.word_level_attention(
                encoded_question, sentence_cnn, self.document_size,
                self.sentence_size, self.word_size, self.sequence_lengths)
            sentence_cnn = self.conv_encoder(sentence_vectors,
                                             self.config.filter_size,
                                             scope="sentence_encoder",
                                             reuse=False)
            document_vector, sentence_score = self.sentence_level_attention(
                encoded_question, sentence_cnn, self.sentence_size,
                self.sentence_lengths)

            self.attention_loss, self.binary_loss = self.auxiliary_loss(
                sentence_score, document_vector, encoded_question)
        with tf.variable_scope("Context_Query_Attention_Layer") and tf.device(
                "/device:GPU:0"):
            A, B = self.co_attention(questions, contexts,
                                     self.question_legnths,
                                     self.context_legnths)
            attention_outputs = [contexts, A, contexts * A, contexts * B]
        with tf.variable_scope("Model_Encoder_Layer"):
            inputs = tf.concat(attention_outputs, axis=2)
            inputs = tf.layers.dense(
                inputs,
                self.config.attention_size,
                kernel_regularizer=self.regularizer,
                kernel_initializer=layers.variance_scaling_initializer(),
                activation=tf.nn.relu)
            memories = []
            for i in range(3):
                outputs = self.residual_block(inputs,
                                              self.context_legnths,
                                              num_blocks=7,
                                              num_conv_blocks=2,
                                              num_filters=128,
                                              kernel_size=5,
                                              scope="Model_Encoder",
                                              reuse=True if i > 0 else False)
                if i == 2:
                    outputs = tf.layers.dropout(outputs, self.dropout)
                memories.append(outputs)
                inputs = outputs

        with tf.variable_scope("Output_Layer") and tf.device("/device:GPU:0"):
            logits_inputs = tf.concat([memories[0], memories[1]], axis=2)
            start_logits = self.pointer_network(document_vector,
                                                logits_inputs,
                                                self.context_legnths,
                                                scope="start_logits")
            logits_inputs = tf.concat([memories[0], memories[2]], axis=2)
            end_logits = self.pointer_network(document_vector,
                                              logits_inputs,
                                              self.context_legnths,
                                              scope="end_logits")

            start_label, end_label = tf.split(self.answer_span, 2, axis=1)
            start_label = tf.squeeze(start_label, axis=-1)
            end_label = tf.squeeze(end_label, axis=-1)
            losses1 = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=start_logits, labels=start_label)
            losses2 = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=end_logits, labels=end_label)
            cross_entropy_loss = tf.reduce_mean(losses1 + losses2)
            self.loss = cross_entropy_loss \
                        + self.config.alpha * self.attention_loss \
                        + self.config.beta * self.binary_loss

        # for inference
        logits1 = tf.nn.softmax(start_logits)
        logits2 = tf.nn.softmax(end_logits)
        outer_product = tf.matmul(tf.expand_dims(logits1, axis=2),
                                  tf.expand_dims(logits2, axis=1))
        outer = tf.matrix_band_part(outer_product, 0, self.config.ans_limit)
        self.start = tf.argmax(tf.reduce_max(outer, axis=2),
                               axis=1,
                               output_type=tf.int32)
        self.end = tf.argmax(tf.reduce_max(outer, axis=1),
                             axis=1,
                             output_type=tf.int32)
        self.em = self.evaluate_em(self.start, self.end, self.answer_span,
                                   self.unans_prob)
        if self.config.l2_lambda > 0:
            vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            l2_loss = layers.apply_regularization(self.regularizer, vars)
            self.loss += l2_loss
        # Exponential moving average
        self.var_ema = tf.train.ExponentialMovingAverage(0.9999)
        ema_op = self.var_ema.apply(tf.trainable_variables())
        with tf.control_dependencies([ema_op]):
            self.loss = tf.identity(self.loss)

            self.assign_vars = []
            for var in tf.global_variables():
                v = self.var_ema.average(var)
                if v:
                    self.assign_vars.append(tf.assign(var, v))

        self.add_train_op()
        self.init_session()
Example #18
0
def build_graph(actor,
                critic,
                value,
                obs_dim,
                num_actions,
                batch_size,
                gamma,
                tau,
                actor_lr,
                critic_lr,
                value_lr,
                reg_factor,
                scope='sac',
                reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        # input placeholders
        obs_t_input = tf.placeholder(tf.float32, [None, obs_dim], name='obs_t')
        act_t_ph = tf.placeholder(tf.float32, [None, num_actions],
                                  name='action')
        rew_t_ph = tf.placeholder(tf.float32, [None], name='reward')
        obs_tp1_input = tf.placeholder(tf.float32, [None, obs_dim],
                                       name='obs_tp1')
        done_mask_ph = tf.placeholder(tf.float32, [None], name='done')

        # actor network
        policy_t, greedy_policy_t, log_pi_t, reg = actor(obs_t_input,
                                                         num_actions,
                                                         reg_factor=reg_factor,
                                                         scope='actor')
        actor_func_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            '{}/actor'.format(scope))

        # critic network
        q_t = critic(obs_t_input, act_t_ph, scope='critic')
        q_t_with_actor = critic(obs_t_input,
                                policy_t,
                                scope='critic',
                                reuse=True)
        critic_func_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                             '{}/critic'.format(scope))

        # value network
        v_t = value(obs_t_input, scope='value')
        value_func_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            '{}/value'.format(scope))

        # target value network
        v_tp1 = value(obs_tp1_input, scope='target_value')
        target_func_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                             '{}/target_value'.format(scope))

        with tf.variable_scope('value_loss'):
            target = q_t - log_pi_t
            value_loss = tf.reduce_mean(
                0.5 * tf.square(v_t - tf.stop_gradient(target)))

        with tf.variable_scope('critic_loss'):
            target = rew_t_ph + gamma * v_tp1 * (1.0 - done_mask_ph)
            critic_loss = tf.reduce_mean(
                0.5 * tf.square(q_t - tf.stop_gradient(target)))

        with tf.variable_scope('policy_loss'):
            target = q_t_with_actor - v_t
            actor_loss = 0.5 * tf.reduce_mean(
                log_pi_t * tf.stop_gradient(log_pi_t - target))
            reg_variables = tf.get_collection(
                tf.GraphKeys.REGULARIZATION_LOSSES)
            l2_loss = layers.apply_regularization(reg, reg_variables)
            actor_loss = actor_loss + l2_loss

        # optimize operations
        critic_optimizer = tf.train.AdamOptimizer(critic_lr)
        critic_optimize_expr = critic_optimizer.minimize(
            critic_loss, var_list=critic_func_vars)
        actor_optimizer = tf.train.AdamOptimizer(actor_lr)
        actor_optimize_expr = actor_optimizer.minimize(
            actor_loss, var_list=actor_func_vars)
        value_optimizer = tf.train.AdamOptimizer(value_lr)
        value_optimize_expr = value_optimizer.minimize(
            value_loss, var_list=value_func_vars)

        # update critic target operations
        with tf.variable_scope('update_value_target'):
            update_target_expr = []
            sorted_vars = sorted(value_func_vars, key=lambda v: v.name)
            sorted_target_vars = sorted(target_func_vars, key=lambda v: v.name)
            # assign value variables to target value variables
            for var, var_target in zip(sorted_vars, sorted_target_vars):
                new_var = tau * var + (1 - tau) * var_target
                update_target_expr.append(var_target.assign(new_var))
            update_target_expr = tf.group(*update_target_expr)

        def act(obs):
            feed_dict = {obs_t_input: obs}
            return tf.get_default_session().run([policy_t, greedy_policy_t],
                                                feed_dict=feed_dict)

        def train_actor(obs, action):
            feed_dict = {obs_t_input: obs, act_t_ph: action}
            loss_val, _ = tf.get_default_session().run(
                [actor_loss, actor_optimize_expr], feed_dict=feed_dict)
            return loss_val

        def train_critic(obs_t, action, rew, obs_tp1, done):
            feed_dict = {
                obs_t_input: obs_t,
                act_t_ph: action,
                rew_t_ph: rew,
                obs_tp1_input: obs_tp1,
                done_mask_ph: done
            }
            loss_val, _ = tf.get_default_session().run(
                [critic_loss, critic_optimize_expr], feed_dict=feed_dict)
            return loss_val

        def train_value(obs_t, action):
            feed_dict = {obs_t_input: obs_t, act_t_ph: action}
            loss_val, _ = tf.get_default_session().run(
                [value_loss, value_optimize_expr], feed_dict=feed_dict)
            return loss_val

        def update_target():
            tf.get_default_session().run(update_target_expr)

        return act, train_actor, train_critic, train_value, update_target
Example #19
0
def _rpc_pooling(net,
                 seq_lens,
                 channel,
                 target_len,
                 unit,
                 regularizer=None,
                 conservative=True,
                 is_training=True):
    def cond(net, seq_lens):
        return tf.greater(tf.shape(net)[1], target_len)

    def rpc(net, seq_lens):
        net = nn_ops.pool1d('max', net, pool_size=3, strides=2)
        seq_lens = tf.to_int32(tf.ceil(tf.to_float(seq_lens) / 2.0))
        shortcut = net
        if unit == 'biconv':
            unit_fn = nn_ops.biconv_unit
        elif unit == 'bottleneck':
            unit_fn = nn_ops.bottleneck_unit
        elif unit == 'gated':
            unit_fn = nn_ops.gated_unit
        elif unit == 'lstm':
            unit_fn = nn_ops.lstm_unit
        else:
            raise NotImplementedError(unit)
        net = unit_fn(net, seq_lens, channel, ['rpc'], is_training)
        net = shortcut + net
        return net, seq_lens

    def body(net, seq_lens):
        net_rpc, seq_lens_rpc = rpc(net, seq_lens)
        length_rpc = tf.shape(net_rpc)[1]
        net_trimmed = tf.slice(net, begin=[0, 0, 0], size=[-1, length_rpc, -1])
        net = tf.where(tf.greater(seq_lens, length_rpc), net_rpc, net_trimmed)
        seq_lens = tf.where(tf.greater(seq_lens, length_rpc), seq_lens_rpc,
                            seq_lens)
        return net, seq_lens

    with tf.variable_scope('rpc_pooling'):
        batch, length, channel_in = net.get_shape().as_list()
        if channel_in != channel:
            net = nn_ops.temporal_batch_norm(net,
                                             seq_lens,
                                             center=True,
                                             scale=True,
                                             activation_fn=tf.nn.relu,
                                             is_training=is_training)
            net = slim.conv2d(
                net,
                channel,
                3,
                1,
                padding='SAME',
                activation_fn=None,
                biases_initializer=None,
                weights_initializer=layers.variance_scaling_initializer(),
                weights_regularizer=regularizer)
        net, seq_lens = tf.while_loop(cond,
                                      body, [net, seq_lens],
                                      shape_invariants=[
                                          tf.TensorShape(
                                              [batch, None, channel]),
                                          seq_lens.get_shape()
                                      ])
        rpc_weights = [
            var for var in tf.get_collection('rpc') if 'weights' in var.name
        ]
        layers.apply_regularization(regularizer, rpc_weights)
        net = nn_ops.temporal_batch_norm(net,
                                         seq_lens,
                                         center=True,
                                         scale=True,
                                         activation_fn=tf.nn.relu,
                                         is_training=is_training)
        net = nn_ops.temporal_mean_pooling(net, seq_lens)
        return net
Example #20
0
    def build_graph(self):
        self._construct_weights()

        saver, logits, KL = self.forward_pass()
        log_softmax_var = tf.nn.log_softmax(logits, axis=-1)

        # apply regularization to weights
        reg = l2_regularizer(self.lam)

        reg_var = apply_regularization(reg, self.weights_q + self.weights_p)

        if self.test_mean_z == 1:
            neg_ll_all = tf.cond(
                tf.logical_not(tf.equal(self.is_training_ph, 1)), lambda: tf.
                reduce_sum(log_softmax_var * self.input_ph, axis=-1),
                lambda: tf.reduce_sum(log_softmax_var * tf.expand_dims(
                    self.input_ph, axis=1),
                                      axis=-1))
        else:
            neg_ll_all = tf.reduce_sum(log_softmax_var *
                                       tf.expand_dims(self.input_ph, axis=1),
                                       axis=-1)

        neg_ll_scalar = -tf.reduce_mean(neg_ll_all)
        KL_scalar = tf.reduce_mean(KL)
        neg_ELBO_scalar = neg_ll_scalar + self.anneal_ph * KL_scalar + 2 * reg_var

        if self.test_mean_z == 1:
            loss_iw = tf.cond(
                tf.logical_not(tf.equal(self.is_training_ph, 1)),
                lambda: neg_ELBO_scalar, lambda: tf.reduce_mean(
                    tf.reduce_logsumexp(-neg_ll_all + self.anneal_ph * KL, 1
                                        ) + tf.log(tf.cast(self.K, tf.float32))
                ))  #Only change this line for without importance sampling

        else:
            loss_iw = tf.reduce_mean(
                tf.reduce_logsumexp(-neg_ll_all + self.anneal_ph * KL, 1) +
                tf.log(tf.cast(self.K, tf.float32)))
            log_softmax_var = tf.cond(
                tf.logical_not(tf.equal(self.is_training_ph, 1)),
                lambda: tf.reduce_logsumexp(log_softmax_var, axis=1),
                lambda: log_softmax_var)

        neg_ELBO = loss_iw + 2 * reg_var

        trainer1 = tf.train.AdamOptimizer(self.lr)
        gradvars_1 = trainer1.compute_gradients(neg_ELBO,
                                                var_list=[
                                                    self.weights_p,
                                                    self.biases_p,
                                                    self.weights_q,
                                                    self.biases_q
                                                ])
        train_op1 = trainer1.apply_gradients(gradvars_1)

        # add summary statistics
        tf.summary.scalar('negative_multi_ll', neg_ll_scalar)
        tf.summary.scalar('KL', KL_scalar)
        tf.summary.scalar('neg_ELBO_train', neg_ELBO_scalar)
        merged = tf.summary.merge_all()

        trainer2 = tf.train.AdamOptimizer(self.lr)
        gradvars_2 = self.forward_pass_ARM()
        train_op2 = trainer2.apply_gradients([(gradvars_2, self.keep_prob_ph)])

        with tf.control_dependencies([train_op1, train_op2]):
            train_op = tf.no_op()

        return saver, log_softmax_var, neg_ELBO, train_op, merged
Example #21
0
                                   -1.0 / math.sqrt(hiddenLayer3),
                                   1.0 / math.sqrt(hiddenLayer3)),
                 trainable=True)
pi1 = tf.Variable(tf.zeros([inputSize]), trainable=True)
pi2 = tf.Variable(tf.zeros([inputSize]), trainable=True)
pi3 = tf.Variable(tf.zeros([inputSize]), trainable=True)

E1 = tf.nn.sigmoid(tf.matmul(X, V1) + mu1)
E2 = tf.nn.sigmoid(tf.add(tf.matmul(E1, V2), mu2))
E3 = tf.nn.sigmoid(tf.add(tf.matmul(E2, V3), mu3))
YS1 = tf.multiply(tf.identity(tf.add(tf.matmul(E1, S1), pi1)), mapping)
YS2 = tf.multiply(tf.identity(tf.add(tf.matmul(E2, S2), pi2)), mapping)
YS3 = tf.multiply(tf.identity(tf.add(tf.matmul(E3, S3), pi3)), mapping)
Ypool = (YS1 + YS2 + YS3) / 3

regularize = layers.apply_regularization(layers.l2_regularizer(scale=lambdaR),
                                         weights_list=[V1, V2, V3, S1, S2, S3])

difference1NM = X - YS1
difference2NM = X - YS2
difference3NM = X - YS3
differencePool = X - Ypool

Loss1NM = tf.reduce_sum(tf.square(difference1NM))
Loss2NM = tf.reduce_sum(tf.square(difference2NM))
Loss3NM = tf.reduce_sum(tf.square(difference3NM))
LossPool = tf.reduce_sum(tf.square(differencePool))

loss = Loss1NM + Loss2NM + Loss3NM + LossPool + regularize

optimizer = layers.optimize_loss(loss=loss,
                                 global_step=tf.train.get_global_step(),
def train(train_record_file, train_log_step, train_param, val_record_file,
          val_log_step, labels_nums, data_shape, snapshot, snapshot_prefix):
    '''
    :param train_record_file: 训练的tfrecord文件
    :param train_log_step: 显示训练过程log信息间隔
    :param train_param: train参数
    :param val_record_file: 验证的tfrecord文件
    :param val_log_step: 显示验证过程log信息间隔
    :param val_param: val参数
    :param labels_nums: labels数
    :param data_shape: 输入数据shape
    :param snapshot: 保存模型间隔
    :param snapshot_prefix: 保存模型文件的前缀名
    :return:
    '''
    [base_lr, max_steps] = train_param
    [batch_size, resize_height, resize_width, depths] = data_shape

    # 获得训练和测试的样本数
    train_nums = get_example_nums(train_record_file)
    val_nums = get_example_nums(val_record_file)
    print('train nums:%d,val nums:%d' % (train_nums, val_nums))

    # 从record中读取图片和labels数据
    # train数据,训练数据一般要求打乱顺序shuffle=True
    train_images, train_labels = read_records(train_record_file,
                                              resize_height,
                                              resize_width,
                                              type='normalization')
    train_images_batch, train_labels_batch = get_batch_images(
        train_images,
        train_labels,
        batch_size=batch_size,
        labels_nums=labels_nums,
        one_hot=True,
        shuffle=False)
    # val数据,验证数据可以不需要打乱数据
    val_images, val_labels = read_records(val_record_file,
                                          resize_height,
                                          resize_width,
                                          type='normalization')
    val_images_batch, val_labels_batch = get_batch_images(
        val_images,
        val_labels,
        batch_size=batch_size,
        labels_nums=labels_nums,
        one_hot=True,
        shuffle=False)

    reg = layers.l2_regularizer(scale=0.1)
    out = nets(inputs=input_images,
               num_classes=labels_nums,
               dropout_keep_prob=keep_prob,
               is_training=is_training,
               reg=reg)
    print("combine.shape={}".format(out.get_shape()))
    # tf.losses.softmax_cross_entropy(onehot_labels=input_labels, logits=out)#添加交叉熵损失loss=1.6

    # pred = tf.cast(tf.argmax(tf.nn.softmax(out), 1), tf.int32)
    weight = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    reg_ws = layers.apply_regularization(reg, weights_list=weight)

    loss1 = tf.reduce_sum(
        tf.nn.softmax_cross_entropy_with_logits(labels=input_labels,
                                                logits=out))
    loss = loss1 + tf.reduce_sum(reg_ws)  # 不加正则项loss<100,加上正则项loss>10000
    tf.summary.scalar("loss", loss)
    train_op = tf.train.AdamOptimizer(base_lr).minimize(loss)
    # accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, input_labels), tf.float32))
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)),
                tf.float32))
    tf.summary.scalar("accuracy", accuracy)
    merged = tf.summary.merge_all()

    train_writer = tf.summary.FileWriter('./log', tf.get_default_graph())

    saver = tf.train.Saver()
    max_acc = 0.0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        for i in range(max_steps + 1):
            batch_input_images, batch_input_labels = sess.run(
                [train_images_batch, train_labels_batch])
            _, train_loss = sess.run(
                [train_op, loss],
                feed_dict={
                    input_images: batch_input_images,
                    input_labels: batch_input_labels,
                    keep_prob: 0.8,
                    is_training: True
                })
            # train测试(这里仅测试训练集的一个batch)
            if i % train_log_step == 0:
                train_acc, train_summary = sess.run(
                    [accuracy, merged],
                    feed_dict={
                        input_images: batch_input_images,
                        input_labels: batch_input_labels,
                        keep_prob: 1.0,
                        is_training: False
                    })
                train_writer.add_summary(train_summary, i)
                print(
                    "%s: Step [%d]  train Loss : %f, training accuracy :  %g" %
                    (datetime.now(), i, train_loss, train_acc))

            # val测试(测试全部val数据)
            if i % val_log_step == 0:
                mean_loss, mean_acc = net_evaluation(sess, loss, accuracy,
                                                     val_images_batch,
                                                     val_labels_batch,
                                                     val_nums)
                print("%s: Step [%d]  val Loss : %f, val accuracy :  %g" %
                      (datetime.now(), i, mean_loss, mean_acc))

            # 模型保存:每迭代snapshot次或者最后一次保存模型
            if (i % snapshot == 0 and i > 0) or i == max_steps:
                print('-----save:{}-{}'.format(snapshot_prefix, i))
                saver.save(sess, snapshot_prefix, global_step=i)
            # 保存val准确率最高的模型
            if mean_acc > max_acc and mean_acc > 0.5:
                max_acc = mean_acc
                path = os.path.dirname(snapshot_prefix)
                best_models = os.path.join(
                    path, 'best_models_{}_{:.4f}.ckpt'.format(i, max_acc))
                print('------save:{}'.format(best_models))
                saver.save(sess, best_models)

        coord.request_stop()
        coord.join(threads)
    train_writer.close()
Example #23
0
    def __init__(self):
        
        #Lets start with defining the batch size.
        #it will same for noise and real data
        self.batch_size = 196
        self.n_noise = 196
        
        #We need to create 2 place holders to hold the data values for noise and data
        self.X_in = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='X')
        self.noise = tf.placeholder(dtype=tf.float32, shape=[None, self.n_noise])      
        
        #Here we call our generator to generate false data
        #we need to define dropout probability and training mode.
        self.g = Generator(self.noise, keep_prob, is_training)
        
        #Here we will create two dicriminator models.
        #these two will share same parameters(weights and biases).
        #one will operate on real data while other is on fake one.
        self.d_real = Discriminator(self.X_in)
        self.d_fake = Discriminator(self.g, reuse=True)
        
        #Separate the trainable variables for both generator as well as discriminator
        self.vars_g = [var for var in tf.trainable_variables() 
                       if var.name.startswith("generator")]
        self.vars_d = [var for var in tf.trainable_variables() 
                       if var.name.startswith("discriminator")]
        
        #Here we will apply some regularization on weights of 
        #generator and discriminator
        self.d_reg = apply_regularization(l2_regularizer(1e-6),self.vars_d)
        self.g_reg = apply_regularization(l2_regularizer(1e-6), self.vars_g)
        
        #We will use binary cross entropy loss to measure the performance
        #of our discriminators
        self.loss_d_real = binary_cross_entropy(tf.ones_like(self.d_real), self.d_real)
        self.loss_d_fake = binary_cross_entropy(tf.zeros_like(self.d_fake), self.d_fake)
        
        #Here we will calculate the loss for both networks
        self.loss_g = tf.reduce_mean(binary_cross_entropy(tf.ones_like(self.d_fake), self.d_fake))
        self.loss_d = tf.reduce_mean(0.5 * (self.loss_d_real + self.loss_d_fake))
        
        #Let's update the graphs
        self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        
        #Now its time to train the networks
        with tf.control_dependencies(self.update_ops):
            #Total loss of Discriminator with regularization            
            total_loss_d = self.loss_d + self.d_reg
            
            #Total loss of Generator with regularization
            total_loss_g = self.loss_g + self.g_reg
            
            #Set the learning rate
            lr = 0.00015

            #We will use RMSprop with SGD for training the networks                        
            self.optimizer_d = tf.train.RMSPropOptimizer(learning_rate=
                                                         lr).minimize(total_loss_d,
                                                                      var_list=self.vars_d)
            self.optimizer_g = tf.train.RMSPropOptimizer(learning_rate=
                                                         lr).minimize(total_loss_g,
                                                                      var_list=self.vars_g)
Example #24
0
    def __init__(self, user_num, item_num, f, user_pos_length, user_neg_length, item_pos_length, item_neg_length,
                 user_pos_vocab_size, user_neg_vocab_size, item_pos_vocab_size, item_neg_vocab_size, embedding_size,
                 filter_sizes, num_filters, n_pos_aspect, n_neg_aspect):

        self.input_u_pos = tf.placeholder(tf.int32, [None, user_pos_length], name='input_u_pos')
        self.input_u_neg = tf.placeholder(tf.int32, [None, user_neg_length], name='input_u_neg')
        self.input_i_pos = tf.placeholder(tf.int32, [None, item_pos_length], name='input_i_pos')
        self.input_i_neg = tf.placeholder(tf.int32, [None, item_neg_length], name='input_i_neg')
        self.input_y = tf.placeholder(tf.float32, [None, 1], name="input_y")
        self.input_uid = tf.placeholder(tf.int32, [None, 1], name="input_uid")
        self.input_iid = tf.placeholder(tf.int32, [None, 1], name="input_iid")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        with tf.name_scope("user_pos_embedding"):
            self.Wu_pos = tf.Variable(tf.random_uniform([user_pos_vocab_size, embedding_size], -1.0, 1.0), trainable=False, name='Wu_pos')
            self.embedded_user_pos = tf.nn.embedding_lookup(self.Wu_pos, self.input_u_pos)
            self.embedded_users_pos = tf.expand_dims(self.embedded_user_pos, -1)

        with tf.name_scope("user_neg_embedding"):
            self.Wu_neg = tf.Variable(tf.random_uniform([user_neg_vocab_size, embedding_size], -1.0, 1.0), trainable=False, name='Wu_neg')
            self.embedded_user_neg = tf.nn.embedding_lookup(self.Wu_neg, self.input_u_neg)
            self.embedded_users_neg = tf.expand_dims(self.embedded_user_neg, -1)

        with tf.name_scope("item_pos_embedding"):
            self.Wi_pos = tf.Variable(tf.random_uniform([item_pos_vocab_size, embedding_size], -1.0, 1.0), trainable=False, name='Wi_pos')
            self.embedded_item_pos = tf.nn.embedding_lookup(self.Wi_pos, self.input_i_pos)
            self.embedded_items_pos = tf.expand_dims(self.embedded_item_pos, -1)

        with tf.name_scope("item_neg_embedding"):
            self.Wi_neg = tf.Variable(tf.random_uniform([item_neg_vocab_size, embedding_size], -1.0, 1.0), trainable=False, name='Wi_neg')
            self.embedded_item_neg = tf.nn.embedding_lookup(self.Wi_neg, self.input_i_neg)
            self.embedded_items_neg = tf.expand_dims(self.embedded_item_neg, -1)

        with tf.name_scope("user_latent_factors"):
            self.user_Matrix = tf.Variable(tf.random_uniform([user_num, f], -1.0, 1.0), name='user_Matrix')
            self.user_latent = tf.nn.embedding_lookup(self.user_Matrix, self.input_uid)
            self.user_latent = tf.reshape(self.user_latent, [-1, f])

        with tf.name_scope("item_latent_factors"):
            self.item_Matrix = tf.Variable(tf.random_uniform([item_num, f], -1.0, 1.0), name='item_Matrix')
            self.item_latent = tf.nn.embedding_lookup(self.item_Matrix, self.input_iid)
            self.item_latent = tf.reshape(self.item_latent, [-1, f])

        with tf.name_scope("pos_aspect_weight"):
            self.pos_W = tf.Variable(tf.random_uniform([n_pos_aspect, f], -1.0, 1.0), name='pos_W')

        with tf.name_scope("neg_aspect_weight"):
            self.neg_W = tf.Variable(tf.random_uniform([n_neg_aspect, f], -1.0, 1.0), name='neg_W')

        output_u_pos = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("user_pos_conv-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_users_pos,
                    W,
                    strides=[1, 1, embedding_size, 1],
                    padding="SAME",
                    name="conv")  # batch_size * user_pos_length * 1 * num_filters
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                h1 = tf.reshape(h, [-1, user_pos_length, num_filters])
                output_u_pos.append(h1)

        num_filters_total = num_filters * len(filter_sizes)
        self.output_u_pos_con = tf.concat(output_u_pos, 2)
        self.output_u_pos_res = tf.reshape(self.output_u_pos_con, [-1, num_filters_total])
        # Layer 1
        Wu_pos_1 = tf.get_variable("Wu_pos_1", shape=[num_filters_total, n_pos_aspect],
                                   initializer=tf.contrib.layers.xavier_initializer())
        bu_pos_1 = tf.Variable(tf.constant(0.1, shape = [n_pos_aspect]))
        self.u_pos_l1 = tf.nn.softmax(tf.nn.relu(tf.matmul(self.output_u_pos_res, Wu_pos_1) + bu_pos_1))


        self.pos_asp = tf.reduce_sum(tf.reshape(self.u_pos_l1, [-1, user_pos_length, n_pos_aspect]), axis=1)
        self.pos_asp_imp = tf.nn.softmax(self.pos_asp)  # batch_size * n_pos_aspect


        output_u_neg = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("user_neg_conv-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1,
                                num_filters]  # [filter_height, filter_width, in_channels, out_channels]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_users_neg,
                    W,
                    strides=[1, 1, embedding_size, 1],
                    padding="SAME",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                h1 = tf.reshape(h, [-1, user_neg_length, num_filters])
                output_u_neg.append(h1)

        self.output_u_neg_con = tf.concat(output_u_neg, 2)
        self.output_u_neg_res = tf.reshape(self.output_u_neg_con, [-1, num_filters_total])
        # Layer 1
        Wu_neg_1 = tf.get_variable("Wu_neg_1", shape=[num_filters_total, n_neg_aspect],
                                   initializer=tf.contrib.layers.xavier_initializer())
        bu_neg_1 = tf.Variable(tf.constant(0.1, shape=[n_neg_aspect]))
        self.u_neg_l1 = tf.nn.softmax(tf.nn.relu(tf.matmul(self.output_u_neg_res, Wu_neg_1) + bu_neg_1))


        self.neg_asp = tf.reduce_sum(tf.reshape(self.u_neg_l1, [-1, user_neg_length, n_neg_aspect]), axis=1)
        self.neg_asp_imp = tf.nn.softmax(self.neg_asp)  # batch_size * n_neg_aspect



        neg_asp_imp_add = []
        with tf.name_scope("pos2neg_imp"):
            W = tf.Variable(tf.truncated_normal(shape=[f, f], stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[f]), name='b')
            h = tf.Variable(tf.truncated_normal(shape=[f, 1], stddev=0.1), name="h")
            for i in range(n_neg_aspect):
                neg_Wi = self.neg_W[i]
                mul = tf.multiply(self.pos_W, neg_Wi)
                rel = tf.nn.relu(tf.matmul(mul, W) + b)
                attn = tf.nn.softmax(tf.matmul(rel, h), dim=0)  # n_pos_aspect * 1
                neg_asp_imp_i = tf.matmul(self.pos_asp_imp, attn)  # batch_size * 1
                neg_asp_imp_add.append(neg_asp_imp_i)

        pos_asp_imp_add = []
        with tf.name_scope("neg2pos_imp"):
            W = tf.Variable(tf.truncated_normal(shape=[f, f], stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[f]), name='b')
            h = tf.Variable(tf.truncated_normal(shape=[f, 1], stddev=0.1), name="h")
            for i in range(n_pos_aspect):
                pos_Wi = self.pos_W[i]
                mul = tf.multiply(self.neg_W, pos_Wi)
                rel = tf.nn.relu(tf.matmul(mul, W) + b)
                attn = tf.nn.softmax(tf.matmul(rel, h), dim=0)
                pos_asp_imp_i = tf.matmul(self.neg_asp_imp, attn)
                pos_asp_imp_add.append(pos_asp_imp_i)

        with tf.name_scope("prediction"):
            # print(self.user_latent.shape())
            self.interaction = tf.multiply(self.user_latent, self.item_latent)
            self.pos_asp_r = tf.matmul(self.interaction, tf.transpose(self.pos_W))  # batch_size * n_pos_asp
            self.pos_imp = self.pos_asp_imp + tf.concat(pos_asp_imp_add, -1)
            self.pos_r = tf.reduce_sum(tf.multiply(self.pos_asp_r, self.pos_imp), axis=-1)

            self.neg_asp_r = tf.matmul(self.interaction, tf.transpose(self.neg_W))
            self.neg_imp = self.neg_asp_imp + tf.concat(neg_asp_imp_add, -1)
            self.neg_r = tf.reduce_sum(tf.multiply(self.neg_asp_r, self.neg_imp), axis=-1)

            self.predictions = self.pos_r - self.neg_r

        regularizer = layers.l2_regularizer(scale=1.0)
        Var_list_1 = [Wu_pos_1, bu_pos_1, Wu_neg_1, bu_neg_1]

        for i, filter_size in enumerate(filter_sizes):
            Var_list_1 += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="user_pos_conv-%s" % filter_size)
            Var_list_1 += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="user_neg_conv-%s" % filter_size)

        reg_1 = layers.apply_regularization(regularizer, weights_list=Var_list_1)

        Var_list_2 = []

        Var_list_3 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="pos2neg_imp") \
                     + tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="neg2pos_imp")

        reg_3 = layers.apply_regularization(regularizer, weights_list=Var_list_3)

        self.variables = Var_list_1 + Var_list_2 + Var_list_3

        reg_4 = layers.apply_regularization(regularizer, weights_list=[self.user_Matrix, self.item_Matrix, self.pos_W, self.neg_W])


        with tf.name_scope("loss"):
            beta_1 = 1e-4
            beta_2 = 0.001
            losses = tf.reduce_mean(tf.square(tf.subtract(self.predictions, self.input_y)))
            self.loss = losses + beta_2 * (reg_1 + reg_3 + reg_4)


        with tf.name_scope("accuracy"):
            self.mae = tf.reduce_mean(tf.abs(tf.subtract(self.predictions, self.input_y)))
            self.accuracy = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.predictions, self.input_y))))
Example #25
0
import tensorflow as tf
from  tensorflow.contrib import layers
data_dir = '/home/pzp/PycharmProjects/pzp_vgg16_project/data/train/*.JPEG'
list = tf.train.match_filenames_once(data_dir)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
dir_list = sess.run(list)
stop = 1
layers.apply_regularization()
Example #26
0
    def test(self, modelpath):
        with self.graph.as_default():
            c3d_net = [
                ["conv", "conv1", [3, 3, 3, 3, 64], 'wc1', 'bc1'],
                ["maxpool", "pool1", [1, 1, 2, 2, 1]],
                ["conv", "conv2", [3, 3, 3, 64, 128], 'wc2', 'bc2'],
                ["maxpool", "pool2", [1, 2, 2, 2, 1]],
                ["conv", "conv3a", [3, 3, 3, 128, 256], 'wc3a', 'bc3a'],
                ["conv", "conv3b", [3, 3, 3, 256, 256], 'wc3b', 'bc3b'],
                ["maxpool", "pool3", [1, 2, 2, 2, 1]],
                ["conv", "conv4a", [3, 3, 3, 256, 512], 'wc4a', 'bc4a'],
                ["conv", "conv4b", [3, 3, 3, 512, 512], 'wc4b', 'bc4b'],
                ["maxpool", "pool4", [1, 2, 2, 2, 1]],
                ["conv", "conv5a", [3, 3, 3, 512, 512], 'wc5a', 'bc5a'],
                ["conv", "conv5b", [3, 3, 3, 512, 512], 'wc5b', 'bc5b'],
                ["maxpool", "pool5", [1, 2, 2, 2, 1]],
                [
                    "transpose", [0, 1, 4, 2, 3]
                ],  #only use it if you restore the sports1m_finetuning_ucf101.model, otherwise uncomment it,(e.g use conv3d_deepnetA_sport1m_iter_1900000_TF.model)
                ["reshape", [-1, 8192]],
                ["fc", "fc1", [8192, 4096], 'wd1', 'bd1', True],
                ["dropout", "dropout1", self.keep_prob],
                ["fc", "fc2", [4096, 4096], 'wd2', 'bd2', True],
                ["dropout", "dropout2", self.keep_prob],
                ["fc", "fc3", [4096, self.num_class], 'wout', 'bout', False],
            ]

            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            config.gpu_options.per_process_gpu_memory_fraction = 0.9

            with tf.Session(config=config, graph=self.graph) as sess:
                logits = self.parseNet(self.inputs, c3d_net)
                softmax_logits = tf.nn.softmax(logits)

                int_label = self.labels

                task_loss = tf.reduce_sum(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits, labels=int_label))

                acc = tf.reduce_mean(
                    tf.cast(
                        tf.equal(tf.argmax(softmax_logits, axis=-1),
                                 int_label), tf.float32))
                right_count = tf.reduce_sum(
                    tf.cast(
                        tf.equal(tf.argmax(softmax_logits, axis=1), int_label),
                        tf.int32))
                ensemble_logist = softmax_logits
                reg_loss = layers.apply_regularization(
                    layers.l2_regularizer(5e-4),
                    tf.get_collection(tf.GraphKeys.WEIGHTS))
                total_loss = task_loss + reg_loss

                train_op = tf.train.GradientDescentOptimizer(self.lr).minimize(
                    total_loss, global_step=self.global_step)

                total_para = np.sum([
                    np.prod(v.get_shape().as_list())
                    for v in tf.trainable_variables()
                ])
                print('total_para:', total_para)

                init = tf.global_variables_initializer()

                sess.run(init)
                saver = tf.train.Saver(tf.trainable_variables())

                # ========================================================================================
                #Recode after lost all code - awful day 21/5/2018

                # test_list=["./test1.list",'./test1.list',"./test1.list"]
                # test_list=["./kth_rgb_test.list",'./kth_fsaf_test2.list',"./kth_of_test2.list"]
                # network_models = ['c3d_kth_rgb','c3d_kth_fsaf','c3d_kth_of']

                test_list = [
                    "./hmdb51_rgb_test.list", "./hmdb51_fsaf_test.list",
                    './hmdb51_of_test2.list'
                ]
                network_models = [
                    'c3d_hmdb51_rgb', 'c3d_hmdb51_fsaf', 'c3d_hmdb51_of'
                ]

                # test_list=["./ucf101_rgb_test.list","./ucf101_saf_test2.list",'./ucf101_of_test2.list']
                # network_models = ['c3d_ucf_rgb','c3d_ucf_saf','c3d_ucf_of']

                # lines = open(test_list[0],'r')
                # # lines = list(lines)
                # lines = list(line for line in lines if line)
                # number_of_line = len(lines)
                # self.test_size = number_of_line
                list_accuracy = []
                pred_labels = []
                true_labels = []
                num_networks = len(network_models)
                # ======================================================================================
                for m in range(num_networks):
                    softmax_one_networks = []
                    saver.restore(sess, modelpath + network_models[m])
                    print("Model {:2d} loading finished!".format(m))
                    step = 0
                    print_freq = 2
                    next_start_pos = 0
                    lines = open(test_list[m], 'r')
                    # lines = list(lines)
                    lines = list(line for line in lines if line)
                    number_of_line = len(lines)
                    self.test_size = number_of_line
                    # print(number_of_line)
                    for one_epoch in range(1):
                        epostarttime = time.time()
                        starttime = time.time()
                        total_v = 0.0
                        test_correct_num = 0

                        for i in tqdm(
                                range(int(self.test_size / self.batch_size))):
                            step += 1
                            total_v += self.batch_size

                            train_batch, label_batch, next_start_pos, _, _ = read_clip_and_label(
                                filename=test_list[m],
                                batch_size=self.batch_size,
                                start_pos=next_start_pos,
                                num_frames_per_clip=self.CLIP_LENGTH,
                                height=self.IMG_HEIGHT,
                                width=self.IMG_WIDTH,
                                shuffle=False)

                            assert len(train_batch) == self.batch_size
                            train_batch = train_aug(
                                train_batch,
                                is_train=False,
                                Crop_heith=self.CROP_HEIGHT,
                                Crop_width=self.CROP_WIDTH,
                                norm=True)
                            val_feed = {
                                self.inputs: train_batch,
                                self.labels: label_batch
                            }
                            test_correct_num += sess.run(right_count, val_feed)

                            #add 22/5
                            softmax = sess.run(ensemble_logist, val_feed)
                            if m == 0:  #get for first network only
                                true_labels.extend(label_batch)

                            softmax_one_networks.extend(softmax)

                            print('test acc:', test_correct_num / total_v,
                                  'test_correct_num:', test_correct_num,
                                  'total_v:', total_v)
                    list_accuracy.append(test_correct_num / total_v)
                    pred_labels.append(softmax_one_networks)

                print(list_accuracy)
                print(np.shape(true_labels), np.shape(pred_labels))
                # pred_labels shape = (num_networks, num_label,num_class)
                # print(true_labels)

                #ensemble:
                number_of_test = len(true_labels)
                if self.ensemble_type == 1:  #average fusion
                    ensemble_pred_labels = np.mean(pred_labels, axis=0)
                    ensemble_cls_pred = np.argmax(ensemble_pred_labels, axis=1)

                elif self.ensemble_type == 2:  # max average
                    ensemble_pred_labels = np.amax(pred_labels, axis=0)
                    ensemble_cls_pred = np.argmax(ensemble_pred_labels, axis=1)
                else:  #vote fusion
                    #Compare networks

                    vote_softmax = np.zeros(number_of_test, dtype=int)
                    print(number_of_test, np.shape(pred_labels))
                    for i in range(number_of_test):
                        argmax_networks = []
                        for m in range(num_networks):
                            argmax_networks.append(
                                np.argmax(pred_labels[m][i], axis=0))
                        # compare each network to choose
                        counter = Counter(argmax_networks)
                        best_net = [(k, v) for k, v in counter.items()
                                    if v == max(counter.values())]
                        if len(
                                best_net
                        ) > 1:  #there are many network with predict the same label
                            vote_softmax[i] = np.argmax(np.amax(pred_labels,
                                                                axis=0),
                                                        axis=1)[i]
                            # print(best_net,i,vote_softmax[i],true_labels[i])
                        else:
                            vote_softmax[i] = best_net[0][0]
                    ensemble_cls_pred = vote_softmax

                ensemble_correct = (ensemble_cls_pred == true_labels)
                print('ensemble accuracy:',
                      np.sum(ensemble_correct / number_of_test))
Example #27
0
def buildGraph(input_placeholder_s1, input_placeholder_s2, labels_placeholder,
               mask_placeholder_s1, mask_placeholder_s2, dropout_placeholder,
               embeddings_matrix):
    params = tf.Variable(embeddings_matrix)
    tensor_s1 = tf.nn.embedding_lookup(params, input_placeholder_s1)
    tensor_s2 = tf.nn.embedding_lookup(params, input_placeholder_s2)
    embeddings_s1 = tf.reshape(tensor_s1, [-1, max_length, embed_size])
    embeddings_s2 = tf.reshape(tensor_s2, [-1, max_length, embed_size])
    #print embeddings_s1.shape
    #print tf.boolean_mask(embeddings_s1, mask_placeholder_s1, axis=1).shape
    #embeddings = tf.concat([tf.reduce_mean(tf.boolean_mask(embeddings_s1, mask_placeholder_s1), axis=1), tf.reduce_mean(tf.boolean_mask(embeddings_s2, mask_placeholder_s2), axis=1)], 0)
    #print embeddings.shape

    dropout_rate = dropout_placeholder

    preds = []
    cell1 = LSTMCell(embed_size, hidden_size)
    cell2 = LSTMCell2(embed_size, hidden_size)

    c = tf.zeros([tf.shape(embeddings_s1)[0], hidden_size])
    h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size])
    initial_state = tf.contrib.rnn.LSTMStateTuple(c, h)
    l1 = tf.reduce_sum(tf.cast(mask_placeholder_s1, tf.int32), axis=1)
    outputs1, state1 = tf.nn.dynamic_rnn(cell1,
                                         embeddings_s1,
                                         dtype=tf.float32,
                                         initial_state=initial_state,
                                         sequence_length=l1)
    h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size])
    initial_state = tf.contrib.rnn.LSTMStateTuple(state1.c, h)
    l2 = tf.reduce_sum(tf.cast(mask_placeholder_s2, tf.int32), axis=1)
    outputs2, state2 = tf.nn.dynamic_rnn(cell2,
                                         embeddings_s2,
                                         dtype=tf.float32,
                                         initial_state=initial_state,
                                         sequence_length=l2)

    func = xavier_weight_init()

    # Implementation of attention on the final hidden layer
    Y = tf.transpose(outputs1, perm=[0, 2, 1])
    W_y = tf.Variable(func([hidden_size, hidden_size]))
    W_h = tf.Variable(func([hidden_size, hidden_size]))
    e_l = tf.constant(1.0, shape=[1, max_length])
    WY = tf.tensordot(W_y, Y, axes=[[0], [1]])
    WY = tf.transpose(WY, perm=[1, 0, 2])
    h_n = tf.reshape(state2.h, shape=[-1, hidden_size, 1])
    Whe = tf.tensordot(h_n, e_l, axes=[[2], [0]])
    Whe = tf.tensordot(W_h, Whe, axes=[[0], [1]])
    Whe = tf.transpose(Whe, perm=[1, 0, 2])
    M = tf.tanh(WY + Whe)
    w_alpha = tf.Variable(func([1, hidden_size]))
    alpha = tf.nn.softmax(tf.tensordot(w_alpha, M, axes=[[1], [1]]))
    alpha = tf.transpose(alpha, perm=[1, 2, 0])
    alpha = tf.reshape(alpha, shape=[-1, max_length, 1])
    #alpha_entries = tf.unstack(alpha, axis = 0, num=[tf.shape(embeddings_s1)[0]])
    #Y_entries = tf.unstack(Y, axis=0, num=[tf.shape(embeddings_s1)[0]])
    #r = tf.stack([tf.matmul(Y_entries[i], alpha_entries[i]) for i in len(alpha.shape[0])], axis=0)

    #print Y.shape, alpha.shape
    #r = tf.tensordot(Y, alpha, axes=[[2], [1]])
    #r = tf.reduce_mean(r, axis=2)
    #r = r[:, :, 0, :]
    #r = tf.diag_part(r)
    r = tf.matmul(Y, alpha)
    r = tf.reshape(r, shape=[-1, hidden_size])
    #r = Y * alpha
    #print r.shape
    #r = tf.matmul(Y, tf.transpose(alpha, perm=[0, 2, 1]))

    U = tf.Variable(func([hidden_size, n_classes]))
    b1 = tf.Variable(tf.zeros([1, n_classes]))
    W_p = tf.Variable(func([hidden_size, hidden_size]))
    W_x = tf.Variable(func([hidden_size, hidden_size]))
    #print r.shape, state2.h.shape
    hstar = tf.tanh(tf.matmul(r, W_p) + tf.matmul(state2.h, W_x))
    #hstar = tf.tanh(tf.matmul(state2.h, W_x))
    h_drop = tf.nn.dropout(hstar, keep_prob=1 - dropout_rate)
    pred = tf.matmul(h_drop, U) + b1
    #pred = tf.add(tf.matmul(h_drop, U), b1, name="pred")

    loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_placeholder,
                                                   logits=pred)
    loss = tf.reduce_mean(loss)
    regularizer = l1_l2_regularizer(l1_reg, l2_reg)
    reg_loss = apply_regularization(regularizer, tf.trainable_variables())
    loss += reg_loss
    #y = labels_placeholder
    #loss = tf.nn.l2_loss(y-preds)
    #loss = tf.reduce_mean(loss)

    optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    #train_op = optimizer.minimize(loss)

    #optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
    gradients = optimizer.compute_gradients(loss)
    grads = [x[0] for x in gradients]
    grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm)
    gradients = [(grads[i], gradients[i][1]) for i in range(len(grads))]
    train_op = optimizer.apply_gradients(gradients)
    return pred, loss, train_op
Example #28
0
    def build_model(self):
        self._build_placeholder()

        data = None
        # Load word vocab and char vocab if we are using pretrained embedding
        if self.parameter['use_word_pretrained'] or self.parameter[
                'use_char_pretrained']:
            with open('necessary.pkl', 'rb') as f:
                data = pickle.load(f)

        self._build_word_and_char_embedding(data)

        # 각각의 임베딩 값을 가져온다
        self._embeddings = []
        self._embeddings.append(
            tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph))
        self._embeddings.append(
            tf.nn.embedding_lookup(self._embedding_matrix[1], self.character))

        # 음절을 이용한 임베딩 값을 구한다.
        character_embedding = tf.reshape(self._embeddings[1], [
            -1, self.parameter["word_length"],
            self.parameter["embedding"][1][2]
        ])
        char_len = tf.reshape(self.character_len, [-1])

        # Dropout after embedding, before lstm layer
        if self.parameter["use_dropout_after_embedding"]:
            character_embedding = tf.nn.dropout(character_embedding,
                                                self.emb_dropout_keep_prob)

        character_emb_rnn = self._build_birnn_model(
            character_embedding,
            char_len,
            self.parameter["char_lstm_units"],
            self.lstm_dropout_keep_prob,
            last=True,
            scope="char_layer")

        # 위에서 구한 모든 임베딩 값을 concat 한다.
        all_data_emb = self.ne_dict
        for i in range(0, len(self._embeddings) - 1):
            all_data_emb = tf.concat([all_data_emb, self._embeddings[i]],
                                     axis=2)
        all_data_emb = tf.concat([all_data_emb, character_emb_rnn], axis=2)

        if self.parameter["use_highway"]:
            all_data_emb = self._build_highway(all_data_emb,
                                               self.parameter["num_layers"],
                                               scope="highway")
        # Dropout after embedding, before lstm layer
        if self.parameter["use_dropout_after_embedding"]:
            all_data_emb = tf.nn.dropout(all_data_emb,
                                         self.emb_dropout_keep_prob)

        # 모든 데이터를 가져와서 Bi-RNN 실시
        sentence_output = self._build_birnn_model(all_data_emb,
                                                  self.sequence,
                                                  self.parameter["lstm_units"],
                                                  self.lstm_dropout_keep_prob,
                                                  scope="all_data_layer")
        if self.parameter["use_self_attention"]:
            aligned_output = self._attention(sentence_output,
                                             self.parameter["lstm_units"],
                                             self.parameter["num_heads"],
                                             self.sequence,
                                             scope="attention")
            outputs = tf.concat([sentence_output, aligned_output], axis=2)
        else:
            outputs = sentence_output

        outputs = tf.nn.dropout(outputs, self.dropout_rate)
        # [b, t, 3*d] -> [b, t, C]
        logits = self._build_dense_layer(outputs)

        # crf layer
        crf_cost = self._build_crf_layer(logits)
        if self.parameter["use_reg_loss"]:
            reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            reg_term = layers.apply_regularization(self.regularizer, reg_vars)
            self.cost = crf_cost + reg_term
        else:
            self.cost = crf_cost

        self.train_op = self._build_output_layer(self.cost)

        # Exponential moving average
        if self.parameter["use_ema"]:
            var_ema = tf.train.ExponentialMovingAverage(
                decay=self.parameter["ema_decay_rate"])
            ema_op = var_ema.apply(tf.trainable_variables())
            with tf.control_dependencies([ema_op]):
                self.cost = tf.identity(self.cost)
Example #29
0
    def setup_model(self, input):
        '''
        Args:
          input: a dictionary contains 'z', 'im_gt', sample_z
        '''
        F = self.F
        #########################
        # (1) Define main model #
        #########################
        g_out = self.G(input['z'])
        d_out_real, d_logit_real = self.D(input['im_gt'])
        d_out_fake, d_logit_fake = self.D(g_out, reuse=True)
        self.output = g_out

        self.sample_img = self.G(input['sample_z'], reuse=True)

        ###################
        # (2) Define loss #
        ###################
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.ones_like(d_out_real), logits=d_logit_real))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.zeros_like(d_out_fake), logits=d_logit_fake))
        g_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.ones_like(d_out_fake), logits=d_logit_fake))

        d_reg_loss = tcl.apply_regularization(self.regularizer,
                                              weights_list=self.D.vars_train)
        g_reg_loss = tcl.apply_regularization(self.regularizer,
                                              weights_list=self.G.vars_train)

        self.d_loss = d_loss_real + d_loss_fake + d_reg_loss
        self.g_loss = g_loss_fake + g_reg_loss

        ########################
        # (3) Define optimizer #
        ########################
        global_step = tf.train.get_global_step()
        d_optimizer = tf.train.AdamOptimizer(learning_rate=F.learning_rate,
                                             beta1=F.beta1,
                                             beta2=F.beta2)
        g_optimizer = tf.train.AdamOptimizer(learning_rate=F.learning_rate,
                                             beta1=F.beta1,
                                             beta2=F.beta2)
        self.d_train_op = tcl.optimize_loss(loss=self.d_loss,
                                            optimizer=d_optimizer,
                                            learning_rate=F.learning_rate,
                                            variables=self.D.vars_train,
                                            global_step=global_step,
                                            name='d_optim')
        self.g_train_op = tcl.optimize_loss(loss=self.g_loss,
                                            optimizer=g_optimizer,
                                            learning_rate=F.learning_rate,
                                            variables=self.G.vars_train,
                                            global_step=global_step,
                                            name='g_optim')

        ######################
        # (4) Define summary #
        ######################
        # scalar summary
        tf.summary.scalar('d_loss_real', d_loss_real)
        tf.summary.scalar('d_loss_fake', d_loss_fake)
        tf.summary.scalar('g_loss_fake', g_loss_fake)
        # histogram summary
        tf.summary.histogram('z', input['z'])
        tf.summary.histogram('d_out_real', d_out_real)
        tf.summary.histogram('d_out_fake', d_out_fake)
        # image summary
        tf.summary.image('generated', g_out, max_outputs=3)
        tf.summary.image('real', input['im_gt'], max_outputs=3)
        # merge all summary operations to a single operation
        self.summary_all = tf.summary.merge_all()

        return self.output
    def test(self, test_list, modelpath):
        with self.graph.as_default():
            c3d_net = [
                ["conv", "conv1", [3, 3, 3, 3, 64], 'wc1', 'bc1'],
                ["maxpool", "pool1", [1, 1, 2, 2, 1]],
                ["conv", "conv2", [3, 3, 3, 64, 128], 'wc2', 'bc2'],
                ["maxpool", "pool2", [1, 2, 2, 2, 1]],
                ["conv", "conv3a", [3, 3, 3, 128, 256], 'wc3a', 'bc3a'],
                ["conv", "conv3b", [3, 3, 3, 256, 256], 'wc3b', 'bc3b'],
                ["maxpool", "pool3", [1, 2, 2, 2, 1]],
                ["conv", "conv4a", [3, 3, 3, 256, 512], 'wc4a', 'bc4a'],
                ["conv", "conv4b", [3, 3, 3, 512, 512], 'wc4b', 'bc4b'],
                ["maxpool", "pool4", [1, 2, 2, 2, 1]],
                ["conv", "conv5a", [3, 3, 3, 512, 512], 'wc5a', 'bc5a'],
                ["conv", "conv5b", [3, 3, 3, 512, 512], 'wc5b', 'bc5b'],
                ["maxpool", "pool5", [1, 2, 2, 2, 1]],
                ["transpose", [0, 1, 4, 2, 3]],  #only use it if you restore the sports1m_finetuning_ucf101.model, otherwise uncomment it,(e.g use conv3d_deepnetA_sport1m_iter_1900000_TF.model)
                ["reshape", [-1, 8192]],
                ["fc", "fc1", [8192, 4096], 'wd1', 'bd1', True],
                ["dropout", "dropout1", self.keep_prob],
                ["fc", "fc2", [4096, 4096],'wd2','bd2', True],
                ["dropout", "dropout2", self.keep_prob],
                ["fc", "fc3", [4096, self.num_class],'wout','bout',False],
            ]

            # print(tf.trainable_variables())
            # print(var_list)
            # print(tf.get_collection(tf.GraphKeys.WEIGHTS))

            # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5)
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            config.gpu_options.per_process_gpu_memory_fraction = 0.9

            with tf.Session(config=config, graph=self.graph) as sess:
                logits = self.parseNet(self.inputs, c3d_net)
                softmax_logits = tf.nn.softmax(logits)
                # int_label = tf.one_hot(self.labels, self.num_class)
                int_label = self.labels  # [bs,101]-->[bs*4 or 8 or 16,101]
                # int_label=tf.concat(
                #     [int_label,int_label,int_label,int_label,],axis=0)

                # int_label=tf.cast(int_label,dtype=tf.int64)
                task_loss = tf.reduce_sum(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=int_label))
                # task_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = int_label))
                # task_loss = -tf.reduce_sum(int_label*tf.log(logits))
                acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=-1), int_label), tf.float32))
                right_count = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=1), int_label), tf.int32))
    
                reg_loss = layers.apply_regularization(layers.l2_regularizer(5e-4),
                                                       tf.get_collection(tf.GraphKeys.WEIGHTS))
                total_loss = task_loss + reg_loss
                # train_var_list = [v for v in tf.trainable_variables() if v.name.find("conv") == -1]
                train_op = tf.train.GradientDescentOptimizer(self.lr).minimize(
                    total_loss, global_step=self.global_step)
                # train_op = tf.train.MomentumOptimizer(self.lr,0.9).minimize(
                #     total_loss, global_step = self.global_step,var_list=train_var_list)
    
    
                total_para = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])
                print('total_para:', total_para)  # all CDC9 :28613120  #pool5 27655936

                # train clip:762960
                # test  clip:302640
                init = tf.global_variables_initializer()
                # var_list = [v for v in tf.trainable_variables() if v.name.find("conv") != -1]  # 初始化只加载卷积层参数
                # print(var_list)
                # saver = tf.train.Saver(tf.global_variables())
                sess.run(init)
                saver = tf.train.Saver(tf.trainable_variables())
                # saver.restore(sess, tf.train.latest_checkpoint(modelpath))
                saver.restore(sess, modelpath + "sports1m_finetuning_ucf101.model")
                print("Model Loading Done!")
                step = 0
                print_freq = 2
                next_start_pos = 0
                for one_epoch in range(1):
                    epostarttime = time.time()
                    starttime = time.time()
                    total_v = 0.0
                    test_correct_num = 0
                    for i in tqdm(range(int(3783 / self.batch_size))):
                        step += 1
                        total_v += self.batch_size
                        train_batch, label_batch, next_start_pos, _, _ = read_clip_and_label(
                            filename=test_list,
                            batch_size=self.batch_size,
                            num_frames_per_clip=self.CLIP_LENGTH,
                            height=self.IMG_HEIGHT,
                            width=self.IMG_WIDTH,
                            start_pos=next_start_pos,
                            shuffle=False
                        )
                        assert len(train_batch)==self.batch_size
                        train_batch = train_aug(train_batch, is_train=False, Crop_heith=self.CROP_HEIGHT,
                                                Crop_width=self.CROP_WIDTH,norm=True)
                        val_feed = {self.inputs: train_batch, self.labels: label_batch}
                        test_correct_num += sess.run(right_count, val_feed)
                        print('test acc:', test_correct_num / total_v, 'test_correct_num:', test_correct_num,
                              'total_v:', total_v)