Exemple #1
0
    def __init__(self,
                 n_uid,
                 n_mid,
                 n_cat,
                 EMBEDDING_DIM,
                 HIDDEN_SIZE,
                 ATTENTION_SIZE,
                 use_negsampling=False):
        super(Model_DIN_V2_Gru_att_Gru,
              self).__init__(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE,
                             ATTENTION_SIZE, use_negsampling)

        # RNN layer(-s)
        with tf.name_scope('rnn_1'):
            rnn_outputs, _ = dynamic_rnn(GRUCell(HIDDEN_SIZE),
                                         inputs=self.item_his_eb,
                                         sequence_length=self.seq_len_ph,
                                         dtype=tf.float32,
                                         scope="gru1")
            tf.summary.histogram('GRU_outputs', rnn_outputs)

        # Attention layer
        with tf.name_scope('Attention_layer_1'):
            att_outputs, alphas = din_fcn_attention(self.item_eb,
                                                    rnn_outputs,
                                                    ATTENTION_SIZE,
                                                    self.mask,
                                                    softmax_stag=1,
                                                    stag='1_1',
                                                    mode='LIST',
                                                    return_alphas=True)
            tf.summary.histogram('alpha_outputs', alphas)

        with tf.name_scope('rnn_2'):
            rnn_outputs2, final_state2 = dynamic_rnn(
                GRUCell(HIDDEN_SIZE),
                inputs=att_outputs,
                sequence_length=self.seq_len_ph,
                dtype=tf.float32,
                scope="gru2")
            tf.summary.histogram('GRU2_Final_State', final_state2)

        inp = tf.concat([
            self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum,
            self.item_eb * self.item_his_eb_sum, final_state2
        ], 1)
        # Fully connected layer
        self.build_fcn_net(inp, use_dice=True)
Exemple #2
0
 def add_sentence_summaries(self, wordvector_embed_size):
     if self.config.bidirectional_sentences:
         forwardcell = DropoutWrapper(GRUCell(wordvector_embed_size), self.dropout_placeholder, self.dropout_placeholder)
         backwardcell = DropoutWrapper(GRUCell(wordvector_embed_size), self.dropout_placeholder, self.dropout_placeholder)
         _, statefw, statebw = bidirectional_dynamic_rnn(forwardcell, backwardcell,
                                                         self.embedded_lines, self.line_length_placeholder,
                                                         dtype = tf.float32, scope = "LineRNN")
         # self.sentence_summaries = tf.concat(1, [statefw, statebw])
         self.sentence_summaries = tf.concat([statefw, statebw], 1)
         return 2*wordvector_embed_size
     else:
         rnncell = DropoutWrapper(GRUCell(wordvector_embed_size), self.dropout_placeholder, self.dropout_placeholder)
         _, self.sentence_summaries = tf.nn.dynamic_rnn(rnncell,
                                                        self.embedded_lines, self.line_length_placeholder,
                                                        dtype = tf.float32, scope = "LineRNN")
         return wordvector_embed_size
Exemple #3
0
    def build_model(self):
        with tf.variable_scope("inferring_module"):
            rdim = 768
            update_num = 2
            batch_size = tf.shape(self.sent1)[0]
            dim = self.sent1.get_shape().as_list()[-1]

            sr_cell = GRUCell(num_units=rdim, activation=tf.nn.relu)

            r_cell = sr_cell

            tri_cell = TriangularCell(num_units=rdim,
                                      r_cell=r_cell,
                                      sent1=self.sent1,
                                      sent2=self.sent2,
                                      sent3=self.sent3,
                                      sent1_length=39,
                                      sent2_length=110,
                                      sent3_length=152,
                                      dim=dim,
                                      use_bias=False,
                                      activation=tf.nn.relu,
                                      sent1_mask=self.sent1_mask,
                                      sent2_mask=self.sent2_mask,
                                      sent3_mask=self.sent3_mask,
                                      initializer=None,
                                      dtype=tf.float32)

            fake_input = tf.tile(tf.expand_dims(self.mark0, axis=1),
                                 [1, update_num, 1])
            self.init_state = tri_cell.zero_state(batch_size=batch_size,
                                                  dtype=tf.float32)

            self.double_output, last_state = dynamic_rnn(
                cell=tri_cell,
                inputs=fake_input,
                initial_state=self.init_state)
            r1_output, r2_output, r3_output = last_state[3:]  # (B, dim)
        temp13 = tf.concat([r1_output, r3_output, r1_output * r3_output],
                           axis=1)
        temp23 = tf.concat([r2_output, r3_output, r2_output * r3_output],
                           axis=1)

        temp13 = dropout(temp13, self.dropout_rate)
        temp23 = dropout(temp23, self.dropout_rate)
        r13 = tf.layers.dense(temp13,
                              768,
                              activation=tf.tanh,
                              kernel_initializer=create_initializer(0.02))
        r23 = tf.layers.dense(temp23,
                              768,
                              activation=tf.tanh,
                              kernel_initializer=create_initializer(0.02))
        temp = tf.concat([self.mark0, r13, r23], axis=1)
        refer_output = tf.layers.dense(
            temp,
            768,
            activation=None,
            kernel_initializer=create_initializer(0.02))
        return refer_output
Exemple #4
0
    def __init__(self, n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=True):
        super(Model_DIN_V2_Gru_Vec_attGru_Neg, self).__init__(n_uid, n_mid, n_cat,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_negsampling)

        with tf.name_scope('rnn_1'):
            rnn_outputs,_ = dynamic_rnn(GRUCell(HIDDEN_SIZE),inputs = self.item_his_eb,sequence_length=self.seq_len_ph,dtype=tf.float32,scope='gru1')
            tf.summary.histogram("GRU_outputs",rnn_outputs)

        aux_loss_1 = self.auxiliary_loss(rnn_outputs[:,:-1,:],self.item_his_eb[:,1:,:],self.noclk_item_his_eb[:,1:,:],self.mask[:,1:],stag="gru")
        self.aux_loss = aux_loss_1
        with tf.name_scope('Attention_layer_1'):
            att_outputs,alphas = din_fcn_attention(self.item_eb,rnn_outputs,ATTENTION_SIZE,self.mask,
                                                   softmax_stag=1,stag='1_1',mode='LIST',return_alphas=True)

            tf.summary.histogram('alpha_outputs',alphas)

        with tf.name_scope('rnn_2'):
            rnn_outputs2,final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE),inputs=rnn_outputs,
                                                    att_scores=tf.expand_dims(alphas,-1),
                                                    sequence_length = self.seq_len_ph,dtype=tf.float32,
                                                    scope="gru2"
                                                    )
            tf.summary.histogram("GRU2_Final_State",final_state2)

        inp = tf.concat([self.uid_batch_embedded,self.item_eb,self.item_his_eb_sum,self.item_eb * self.item_his_eb_sum,final_state2],1)
        self.build_fcn_net(inp,use_dice=True)
Exemple #5
0
def RNN(_X, _weights, _biases, lens):
    if FLAGS.unit == "PLSTM":
        cell = PhasedLSTMCell(FLAGS.n_hidden,
                              use_peepholes=True,
                              state_is_tuple=True)
    elif FLAGS.unit == "GRU":
        cell = GRUCell(FLAGS.n_hidden)
    elif FLAGS.unit == "LSTM":
        cell = LSTMCell(FLAGS.n_hidden,
                        use_peepholes=True,
                        state_is_tuple=True)
    else:
        raise ValueError("Unit '{}' not implemented.".format(FLAGS.unit))

    outputs = multiPLSTM(_X, lens, FLAGS.n_layers, FLAGS.n_hidden, n_input)

    outputs = tf.slice(outputs, [0, 0, 0], [-1, -1, FLAGS.n_hidden])

    # TODO better (?) in lack of smart indexing
    batch_size = tf.shape(outputs)[0]
    max_len = tf.shape(outputs)[1]
    out_size = int(outputs.get_shape()[2])
    index = tf.range(0, batch_size) * max_len + (lens - 1)
    flat = tf.reshape(outputs, [-1, out_size])
    relevant = tf.gather(flat, index)

    return tf.nn.bias_add(tf.matmul(relevant, _weights['out']), _biases['out'])
Exemple #6
0
    def __init__(self, feature_size, eb_dim, hidden_size, max_time_len,
                 user_fnum, item_fnum, emb_initializer):
        super(DIEN,
              self).__init__(feature_size, eb_dim, hidden_size, max_time_len,
                             user_fnum, item_fnum, emb_initializer)
        mask = tf.sequence_mask(self.user_seq_length_ph,
                                max_time_len,
                                dtype=tf.float32)

        # attention RNN layer
        with tf.name_scope('rnn_1'):
            user_seq_ht, _ = tf.nn.dynamic_rnn(
                GRUCell(hidden_size),
                inputs=self.user_seq,
                sequence_length=self.user_seq_length_ph,
                dtype=tf.float32,
                scope='gru1')
        with tf.name_scope('attention'):
            atten_score, _, = self.attention(user_seq_ht, user_seq_ht,
                                             self.target_item, mask)
        with tf.name_scope('rnn_2'):
            _, seq_rep = dynamic_rnn(VecAttGRUCell(hidden_size),
                                     inputs=user_seq_ht,
                                     att_scores=atten_score,
                                     sequence_length=self.user_seq_length_ph,
                                     dtype=tf.float32,
                                     scope="argru1")

        inp = tf.concat([seq_rep, self.target_user, self.target_item], axis=1)

        # fully connected layer
        self.build_fc_net(inp)
        self.build_logloss()
Exemple #7
0
  def __init__(self, n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=True):
    super(Model_DIN_V2_Gru_Vec_attGru_Neg, self).__init__(n_uid, n_mid, n_cat,
                              EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,
                              use_negsampling)

    # RNN layer(-s) 第一层GRU 将用户行为历史的item embedding输入到dynamic rnn中,同时计算辅助loss
    with tf.name_scope('rnn_1'):
      rnn_outputs, _ = dynamic_rnn(GRUCell(HIDDEN_SIZE), inputs=self.item_his_eb,
                     sequence_length=self.seq_len_ph, dtype=tf.float32,
                     scope="gru1")
      tf.summary.histogram('GRU_outputs', rnn_outputs)

    # 辅助loss的计算其实是一个二分类模型,代码如下:
    aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :],
                     self.noclk_item_his_eb[:, 1:, :],
                     self.mask[:, 1:], stag="gru")
    self.aux_loss = aux_loss_1

    # Attention layer
    with tf.name_scope('Attention_layer_1'):
      att_outputs, alphas = din_fcn_attention(self.item_eb, rnn_outputs, ATTENTION_SIZE, self.mask,
                          softmax_stag=1, stag='1_1', mode='LIST', return_alphas=True)
      tf.summary.histogram('alpha_outputs', alphas)

    with tf.name_scope('rnn_2'):
      rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), inputs=rnn_outputs,
                           att_scores = tf.expand_dims(alphas, -1), #计算兴趣的进化过程
                           sequence_length=self.seq_len_ph, dtype=tf.float32,
                           scope="gru2")
      tf.summary.histogram('GRU2_Final_State', final_state2)

    inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, final_state2], 1)
    #最后我们通过一个多层神经网络,得到最终的ctr预估值
    self.build_fcn_net(inp, use_dice=True)
 def __init__(self, num_units, memory, pmemory, cell_type='lstm'):
     super(AttentionCell, self).__init__()
     self._cell = LSTMCell(num_units) if cell_type == 'lstm' else GRUCell(num_units)
     self.num_units = num_units
     self.memory = memory
     self.pmemory = pmemory
     self.mem_units = memory.get_shape().as_list()[-1]
Exemple #9
0
    def __init__(self, feature_size, eb_dim, hidden_size, max_time_len,
                 user_fnum, item_fnum, emb_initializer):
        super(HPMN,
              self).__init__(feature_size, eb_dim, hidden_size, max_time_len,
                             user_fnum, item_fnum, emb_initializer)
        self.layer_num = 3
        self.split_by = 2
        self.memory = []
        with tf.name_scope('rnn'):
            inp = self.user_seq
            length = max_time_len
            for i in range(self.layer_num):
                user_seq_ht, user_seq_final_state = tf.nn.dynamic_rnn(
                    GRUCell(hidden_size),
                    inputs=inp,
                    dtype=tf.float32,
                    scope='GRU%s' % i)

                user_seq_final_state = tf.expand_dims(user_seq_final_state, 1)
                self.memory.append(user_seq_final_state)

                length = int(length / self.split_by)
                user_seq_ht = tf.reshape(
                    user_seq_ht, [-1, length, self.split_by, hidden_size])
                inp = tf.reshape(
                    tf.gather(user_seq_ht, [self.split_by - 1], axis=2),
                    [-1, length, hidden_size])

        self.memory = tf.concat(self.memory, axis=1)
        _, output = self.attention(self.memory, self.memory, self.target_item)
        self.repre = tf.concat([self.target_user, self.target_item, output],
                               axis=1)
        self.build_fc_net(self.repre)
        self.build_loss()
Exemple #10
0
    def __init__(self,
                 n_uid,
                 n_mid,
                 EMBEDDING_DIM,
                 HIDDEN_SIZE,
                 BATCH_SIZE,
                 SEQ_LEN=256):
        super(Model_GRU4REC, self).__init__(n_uid,
                                            n_mid,
                                            EMBEDDING_DIM,
                                            HIDDEN_SIZE,
                                            BATCH_SIZE,
                                            SEQ_LEN,
                                            Flag="GRU4REC")
        with tf.name_scope('rnn_1'):
            self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE)
            rnn_outputs, final_state1 = dynamic_rnn(
                GRUCell(2 * EMBEDDING_DIM),
                inputs=self.item_his_eb,
                sequence_length=self.sequence_length,
                dtype=tf.float32,
                scope="gru1")
            tf.summary.histogram('GRU_outputs', rnn_outputs)

        inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state1], 1)
        self.build_fcn_net(inp, use_dice=False)
Exemple #11
0
    def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=400, use_negsample=False):
        super(Model_DIEN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
                                           BATCH_SIZE, SEQ_LEN, use_negsample, Flag="DIEN")

        with tf.name_scope('rnn_1'):
            self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE)
            rnn_outputs, _ = dynamic_rnn(GRUCell(2*EMBEDDING_DIM), inputs=self.item_his_eb,
                                         sequence_length=self.sequence_length, dtype=tf.float32,
                                         scope="gru1")
            tf.summary.histogram('GRU_outputs', rnn_outputs)        
        
        if use_negsample:
            aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :],
                                             self.neg_his_eb[:, 1:, :], self.mask[:, 1:], stag = "bigru_0")
            self.aux_loss = aux_loss_1

        # Attention layer
        with tf.name_scope('Attention_layer_1'):
            att_outputs, alphas = din_attention(self.item_eb, rnn_outputs, HIDDEN_SIZE, mask=self.mask, mode="LIST", return_alphas=True)
            tf.summary.histogram('alpha_outputs', alphas)

        with tf.name_scope('rnn_2'):
            rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), inputs=rnn_outputs,
                                                     att_scores = tf.expand_dims(alphas, -1),
                                                     sequence_length=self.sequence_length, dtype=tf.float32,
                                                     scope="gru2")
            tf.summary.histogram('GRU2_Final_State', final_state2)

        inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum], 1)
        self.build_fcn_net(inp, use_dice=False)
Exemple #12
0
    def __init__(self,
                 n_uid,
                 n_mid,
                 EMBEDDING_DIM,
                 HIDDEN_SIZE,
                 BATCH_SIZE,
                 SEQ_LEN=256):
        super(Model_ARNN, self).__init__(n_uid,
                                         n_mid,
                                         EMBEDDING_DIM,
                                         HIDDEN_SIZE,
                                         BATCH_SIZE,
                                         SEQ_LEN,
                                         Flag="ARNN")
        with tf.name_scope('rnn_1'):
            self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE)
            rnn_outputs, final_state1 = dynamic_rnn(
                GRUCell(2 * EMBEDDING_DIM),
                inputs=self.item_his_eb,
                sequence_length=self.sequence_length,
                dtype=tf.float32,
                scope="gru1")
            tf.summary.histogram('GRU_outputs', rnn_outputs)
        # Attention layer
        with tf.name_scope('Attention_layer_1'):
            att_gru = din_attention(self.item_eb, rnn_outputs, HIDDEN_SIZE,
                                    self.mask)
            att_gru = tf.reduce_sum(att_gru, 1)

        inp = tf.concat(
            [self.item_eb, self.item_his_eb_sum, final_state1, att_gru], -1)
        self.build_fcn_net(inp, use_dice=False)
Exemple #13
0
    def build_tf_net(self, datas, is_train=True):

        super(Model_DIEN,self).build_tf_net(datas, is_train)

        # RNN layer(-s)
        # GRU of interest extractor layer
        with tf.name_scope('rnn_1'):
            rnn_outputs, _ = dynamic_rnn(GRUCell(self.hidden_size, kernel_initializer=get_tf_initializer()), inputs=self.item_his_eb, sequence_length=self.tensors.seq_len, dtype=tf.float32, scope="gru1")

        aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :],
                                         self.noclk_item_his_eb[:, 1:, :],
                                         self.tensors.mask[:, 1:], stag="gru")
        self.aux_loss = aux_loss_1

        # Attention layer
        # Attention of interest evolving layer
        with tf.name_scope('Attention_layer_1'):
            att_outputs, alphas = din_fcn_attention(self.item_eb, rnn_outputs, self.attention_size, self.tensors.mask,
                                                    softmax_stag=1, stag='1_1', mode='LIST', return_alphas=True)

        # AUGRU of interest evolving layer
        with tf.name_scope('rnn_2'):
            rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(self.hidden_size, kernel_initializer=get_tf_initializer()), inputs=rnn_outputs,
                                                     att_scores=tf.expand_dims(
                                                         alphas, -1),
                                                     sequence_length=self.tensors.seq_len, dtype=tf.float32,
                                                     scope="gru2")

        inp = tf.concat([self.tensors.uid, self.item_eb, final_state2, self.item_his_eb_sum], 1)

        # after concat and flatten(?), put into Model.fcn
        self.build_fcn_net(inp, use_dice=True)
	def __init__(self, batch, hidden, keep_prob=1.0, is_train=None, scope="ptr_net"):
		self.gru = GRUCell(hidden)
		self.batch = batch
		self.scope = scope
		self.keep_prob = keep_prob
		self.is_train = is_train
		self.dropout_mask = dropout(tf.ones(
			[batch, hidden], dtype=tf.float32), keep_prob=keep_prob, is_train=is_train)
 def _create_rnn_cell(self):
     cell = GRUCell(
         self.cfg.num_units) if self.cfg.cell_type == "gru" else LSTMCell(
             self.cfg.num_units)
     if self.cfg.use_dropout:
         cell = DropoutWrapper(cell, output_keep_prob=self.keep_prob)
     if self.cfg.use_residual:
         cell = ResidualWrapper(cell)
     return cell
Exemple #16
0
 def _create_single_rnn_cell(self, num_units):
     cell = GRUCell(
         num_units) if self.cfg["cell_type"] == "gru" else LSTMCell(
             num_units)
     if self.cfg["use_dropout"]:
         cell = DropoutWrapper(cell, output_keep_prob=self.rnn_keep_prob)
     if self.cfg["use_residual"]:
         cell = ResidualWrapper(cell)
     return cell
Exemple #17
0
 def __init__(self,
              num_layers,
              num_units,
              batch_size,
              input_size,
              keep_prob=1.0,
              is_train=None,
              scope="native_gru",
              activation=tf.nn.tanh):
     self.num_layers = num_layers
     self.grus = []
     self.inits = []
     self.dropout_mask = []
     self.scope = scope
     for layer in range(num_layers):
         input_size_ = input_size if layer == 0 else 2 * num_units
         gru_fw = GRUCell(num_units, activation=activation)
         gru_bw = GRUCell(num_units, activation=activation)
         init_fw = tf.tile(tf.Variable(tf.zeros([1, num_units])),
                           [batch_size, 1])
         init_bw = tf.tile(tf.Variable(tf.zeros([1, num_units])),
                           [batch_size, 1])
         mask_fw = Dropout(tf.ones([batch_size, 1, input_size_],
                                   dtype=tf.float32),
                           keep_prob=keep_prob,
                           is_train=is_train,
                           mode='')
         mask_bw = Dropout(tf.ones([batch_size, 1, input_size_],
                                   dtype=tf.float32),
                           keep_prob=keep_prob,
                           is_train=is_train,
                           mode='')
         self.grus.append((
             gru_fw,
             gru_bw,
         ))
         self.inits.append((
             init_fw,
             init_bw,
         ))
         self.dropout_mask.append((
             mask_fw,
             mask_bw,
         ))
Exemple #18
0
    def __call__(self, features, labels):
        super(Model_DIN_V2_Gru_Vec_attGru_Neg, self).__call__(features, labels)

        def dtype_getter(getter, name, dtype=None, *args, **kwargs):
            var = getter(name, dtype=self.model_dtype, *args, **kwargs)
            return var

        with tf.variable_scope("dien",
                               custom_getter=dtype_getter,
                               dtype=self.model_dtype):
            # RNN layer(-s)
            with tf.name_scope('rnn_1'):
                res_1 = GRUCell(self.HIDDEN_SIZE)
                #res_2 = CudnnGRU(self.HIDDEN_SIZE)
                rnn_outputs, _ = dynamic_rnn(res_1,
                                             inputs=self.item_his_eb,
                                             sequence_length=self.seq_len_ph,
                                             dtype=self.model_dtype,
                                             scope="gru1")
                tf.summary.histogram('GRU_outputs', rnn_outputs)

            aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :],
                                             self.item_his_eb[:, 1:, :],
                                             self.noclk_item_his_eb[:, 1:, :],
                                             self.mask[:, 1:],
                                             stag="gru")
            self.aux_loss = aux_loss_1

            # Attention layer
            with tf.name_scope('Attention_layer_1'):
                att_outputs, alphas = din_fcn_attention(self.item_eb,
                                                        rnn_outputs,
                                                        self.ATTENTION_SIZE,
                                                        self.mask,
                                                        softmax_stag=1,
                                                        stag='1_1',
                                                        mode='LIST',
                                                        return_alphas=True,
                                                        forCnn=True)
                tf.summary.histogram('alpha_outputs', alphas)

            with tf.name_scope('rnn_2'):
                rnn_outputs2, final_state2 = dynamic_rnn(
                    VecAttGRUCell(self.HIDDEN_SIZE),
                    inputs=rnn_outputs,
                    att_scores=tf.expand_dims(alphas, -1),
                    sequence_length=self.seq_len_ph,
                    dtype=self.model_dtype,
                    scope="gru2")
                tf.summary.histogram('GRU2_Final_State', final_state2)

            inp = tf.concat([
                self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum,
                self.item_eb * self.item_his_eb_sum, final_state2
            ], 1)
            self.build_fcn_net(inp, use_dice=True)
Exemple #19
0
 def add_conversational_context(self, sentence_summary_size):
     line_vectors_as_timesteps = tf.expand_dims(self.sentence_summaries, 0)
     if self.config.bidirectional_conversations:
         forwardcell = DropoutWrapper(GRUCell(sentence_summary_size), self.dropout_placeholder, self.dropout_placeholder)
         backwardcell = DropoutWrapper(GRUCell(sentence_summary_size), self.dropout_placeholder, self.dropout_placeholder)
         outputs, sf, sb = bidirectional_dynamic_rnn(forwardcell, backwardcell,
                                                     line_vectors_as_timesteps,
                                                     tf.slice(tf.shape(line_vectors_as_timesteps),[1],[1]),  # what the f*****g f**k
                                                     dtype = tf.float32, scope = "ChapterRNN")
         self.conversation_state = tf.squeeze(outputs)
         return 2*sentence_summary_size
     else:
         rnncell = DropoutWrapper(GRUCell(sentence_summary_size), self.dropout_placeholder, self.dropout_placeholder)
         outputs, state = tf.nn.dynamic_rnn(rnncell,
                                            line_vectors_as_timesteps,
                                            tf.slice(tf.shape(line_vectors_as_timesteps),[1],[1]),  # what the f*****g f**k
                                            dtype = tf.float32, scope = "ChapterRNN")
         self.conversation_state = tf.squeeze(outputs)
         return sentence_summary_size
Exemple #20
0
    def encoder_impl(self, encoder_input, is_training):
        dropout_rate = self._config.dropout_rate if is_training else 0.0

        # Mask
        encoder_mask = tf.to_int32(tf.not_equal(encoder_input, 0))
        sequence_lengths = tf.reduce_sum(encoder_mask, axis=1)

        # Embedding
        encoder_output = embedding(encoder_input,
                                   vocab_size=self._config.src_vocab_size,
                                   dense_size=self._config.hidden_units,
                                   kernel=self._src_embedding,
                                   multiplier=self._config.hidden_units**0.5
                                   if self._config.scale_embedding else 1.0,
                                   name="src_embedding")

        # Dropout
        encoder_output = tf.layers.dropout(encoder_output,
                                           rate=dropout_rate,
                                           training=is_training)

        cell_fw = GRUCell(num_units=self._config.hidden_units, name='fw_cell')
        cell_bw = GRUCell(num_units=self._config.hidden_units, name='bw_cell')

        # RNN
        encoder_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=encoder_output,
            sequence_length=sequence_lengths,
            dtype=tf.float32)

        encoder_output = tf.concat(encoder_outputs, axis=2)

        # Dropout
        encoder_output = tf.layers.dropout(encoder_output,
                                           rate=dropout_rate,
                                           training=is_training)

        # Mask
        encoder_output *= tf.expand_dims(tf.to_float(encoder_mask), axis=-1)

        return encoder_output
Exemple #21
0
    def build_model(self):
        with tf.variable_scope("inferring_module"):
            rdim = 768
            update_num = self.update_num
            batch_size = tf.shape(self.sent1)[0]
            dim = self.sent1.get_shape().as_list()[-1]

            # gru_layer = BiGRU(num_layers=1, num_units=rdim, batch_size=batch_size,
            #                   input_size=dim, keep_prob=0.9, is_train=self.is_training,
            #                   activation=tf.nn.tanh)
            # sent1_len = tf.cast(tf.reduce_sum(self.sent1_mask, axis=1), tf.int32)
            # sent2_len = tf.cast(tf.reduce_sum(self.sent2_mask, axis=1), tf.int32)
            # self.sent1 = gru_layer(self.sent1, sent1_len)
            # self.sent2 = gru_layer(self.sent2, sent2_len)

            sr_cell = GRUCell(num_units=2 * rdim, activation=tf.nn.relu)

            r_cell = sr_cell

            tri_cell = DoubleJointCell(num_units=2 * rdim,
                                       r_cell=r_cell,
                                       sent1=self.sent1,
                                       sent2=self.sent2,
                                       dim=dim,
                                       update_num=update_num,
                                       use_bias=False,
                                       activation=tf.tanh,
                                       dropout_rate=self.dropout_rate,
                                       sent1_mask=self.sent1_mask,
                                       sent2_mask=self.sent2_mask,
                                       initializer=None,
                                       dtype=tf.float32)

            fake_input = tf.tile(tf.expand_dims(self.mark0, axis=1),
                                 [1, update_num, 1])
            self.init_state = tri_cell.zero_state(batch_size=batch_size,
                                                  dtype=tf.float32)

            self.double_output, last_state = dynamic_rnn(
                cell=tri_cell,
                inputs=fake_input,
                initial_state=self.init_state)
            refer_output = tf.reduce_mean(self.double_output,
                                          axis=1)  # (B, dim)
        # temp = tf.concat([refer_output, self.mark0], axis=1)
        #
        # temp = dropout(temp, self.dropout_rate)
        refer_output = tf.layers.dense(
            refer_output,
            768,
            activation=tf.nn.tanh,
            kernel_initializer=create_initializer(0.02))

        # return refer_output * (1 - gate) + gate * self.mark0
        return refer_output + self.mark0
Exemple #22
0
 def __init__(self,
              num_layers,
              num_units,
              cell_type='lstm',
              scope='stack_bi_rnn'):
     if type(num_units) == list:
         assert len(
             num_units
         ) == num_layers, "if num_units is a list, then its size should equal to num_layers"
         self.cells_fw = [LSTMCell(num_units[i]) for i in range(num_layers)] if cell_type == 'lstm' else \
             [GRUCell(num_units[i]) for i in range(num_layers)]
         self.cells_bw = [LSTMCell(num_units[i]) for i in range(num_layers)] if cell_type == 'lstm' else \
             [GRUCell(num_units[i]) for i in range(num_layers)]
     else:
         self.cells_fw = [LSTMCell(num_units) for _ in range(num_layers)] if cell_type == 'lstm' else \
             [GRUCell(num_units) for _ in range(num_layers)]
         self.cells_bw = [LSTMCell(num_units) for _ in range(num_layers)] if cell_type == 'lstm' else \
             [GRUCell(num_units) for _ in range(num_layers)]
     self.num_layers = num_layers
     self.scope = scope
Exemple #23
0
    def build_graph(self):
        # RNN layer(-s)
        with tf.name_scope('rnn_1'):
            rnn_outputs, _ = dynamic_rnn(
                GRUCell(self.HIDDEN_SIZE),
                inputs=self.item_his_eb,
                max_iteration=self.options.max_rnn_while_loops,
                sequence_length=self.seq_len_ph,
                dtype=tf.float32,
                scope="gru1")
            tf.summary.histogram('GRU_outputs', rnn_outputs)

        # Attention layer
        with tf.name_scope('Attention_layer_1'):
            att_outputs, alphas = din_fcn_attention(self.item_eb,
                                                    rnn_outputs,
                                                    self.ATTENTION_SIZE,
                                                    self.mask,
                                                    softmax_stag=1,
                                                    stag='1_1',
                                                    mode='LIST',
                                                    return_alphas=True)
            tf.summary.histogram('alpha_outputs', alphas)

        with tf.name_scope('rnn_2'):
            rnn_outputs2, final_state2 = dynamic_rnn(
                GRUCell(self.HIDDEN_SIZE),
                inputs=att_outputs,
                max_iteration=self.options.max_rnn_while_loops,
                sequence_length=self.seq_len_ph,
                dtype=tf.float32,
                scope="gru2")
            tf.summary.histogram('GRU2_Final_State', final_state2)

        inp = tf.concat([
            self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum,
            self.item_eb * self.item_his_eb_sum, final_state2
        ], 1)
        # Fully connected layer
        self.build_fcn_net(inp, use_dice=True)
Exemple #24
0
    def __init__(self, config):
        self.config = config

        self.input = tf.placeholder(
            'int32', [self.config.batch_size, config.max_seq_len],
            name='input')
        self.labels = tf.placeholder('int64', [self.config.batch_size],
                                     name='labels')
        self.labels_one_hot = tf.one_hot(indices=self.labels,
                                         depth=config.output_dim,
                                         on_value=1.0,
                                         off_value=0.0,
                                         axis=-1)

        self.gru = GRUCell(config.hidden_state_dim)

        embeddings_we = tf.get_variable(
            'word_embeddings',
            initializer=tf.random_uniform(
                [config.vocab_size, config.embedding_dim], -1.0, 1.0))
        self.emb = embed_input = tf.nn.embedding_lookup(
            embeddings_we, self.input)
        inputs = [
            tf.squeeze(i, squeeze_dims=[1])
            for i in tf.split(1, config.max_seq_len, embed_input)
        ]

        outputs, last_slu_state = tf.nn.rnn(
            cell=self.gru,
            inputs=inputs,
            dtype=tf.float32,
        )

        w_project = tf.get_variable(
            'project2labels',
            initializer=tf.random_uniform(
                [config.hidden_state_dim, config.output_dim], -1.0, 1.0))
        self.logits = logits_bo = tf.matmul(last_slu_state, w_project)
        tf.histogram_summary('logits', logits_bo)
        self.probabilities = tf.nn.softmax(logits_bo)
        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits_bo,
                                                    self.labels_one_hot))
        self.predict = tf.nn.softmax(logits_bo)

        # TensorBoard
        self.accuracy = tf.reduce_mean(tf.cast(
            tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'),
                                       name='accuracy')
        tf.scalar_summary('CCE loss', self.loss)
        tf.scalar_summary('Accuracy', self.accuracy)
        self.tb_info = tf.merge_all_summaries()
Exemple #25
0
    def createGraph(self):

        self.input = tf.placeholder(tf.int32, [self.batch_size, self.seq_len],
                                    name='inputs')
        self.targs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len],
                                    name='targets')
        onehot = tf.one_hot(self.input, self.vocab_size, name='input_oh')

        inputs = tf.split(onehot, self.seq_len, 1)
        inputs = [tf.squeeze(i, [1]) for i in inputs]
        targets = tf.split(self.targs, self.seq_len, 1)

        with tf.variable_scope("posRNN"):

            cells = [GRUCell(self.num_hidden) for _ in range(self.num_layers)]

            stacked = MultiRNNCell(cells, state_is_tuple=True)
            self.zero_state = stacked.zero_state(self.batch_size, tf.float32)

            outputs, self.last_state = seq2seq.rnn_decoder(
                inputs, self.zero_state, stacked)

            w = tf.get_variable(
                "w", [self.num_hidden, self.vocab_size],
                tf.float32,
                initializer=tf.random_normal_initializer(stddev=0.02))
            b = tf.get_variable("b", [self.vocab_size],
                                initializer=tf.constant_initializer(0.0))
            logits = [tf.matmul(o, w) + b for o in outputs]

            const_weights = [
                tf.ones([self.batch_size]) for _ in xrange(self.seq_len)
            ]
            self.loss = seq2seq.sequence_loss(logits, targets, const_weights)

            self.opt = tf.train.AdamOptimizer(0.001,
                                              beta1=0.5).minimize(self.loss)

        with tf.variable_scope("posRNN", reuse=True):

            batch_size = 1
            self.s_inputs = tf.placeholder(tf.int32, [batch_size],
                                           name='s_inputs')
            s_onehot = tf.one_hot(self.s_inputs,
                                  self.vocab_size,
                                  name='s_input_oh')

            self.s_zero_state = stacked.zero_state(batch_size, tf.float32)
            s_outputs, self.s_last_state = seq2seq.rnn_decoder(
                [s_onehot], self.s_zero_state, stacked)
            s_outputs = tf.reshape(s_outputs, [1, self.num_hidden])
            self.s_probs = tf.nn.softmax(tf.matmul(s_outputs, w) + b)
Exemple #26
0
    def __init__(self,
                 n_uid,
                 n_mid,
                 n_cat,
                 EMBEDDING_DIM,
                 HIDDEN_SIZE,
                 ATTENTION_SIZE,
                 use_negsampling=False,
                 use_others=False):
        super(Model_DIN_V2_Gru_Vec_attGru,
              self).__init__(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE,
                             ATTENTION_SIZE, use_negsampling, use_others)
        with self.grath.as_default():
            # RNN layer(-s)
            with tf.name_scope('rnn_1'):
                rnn_outputs, _ = dynamic_rnn(GRUCell(HIDDEN_SIZE),
                                             inputs=self.item_his_eb,
                                             sequence_length=self.seq_len_ph,
                                             dtype=tf.float32,
                                             scope="gru1")
                tf.summary.histogram('GRU_outputs', rnn_outputs)

            # Attention layer
            with tf.name_scope('Attention_layer_1'):
                att_outputs, alphas = din_fcn_attention(self.item_eb,
                                                        rnn_outputs,
                                                        ATTENTION_SIZE,
                                                        self.mask,
                                                        softmax_stag=1,
                                                        stag='1_1',
                                                        mode='LIST',
                                                        return_alphas=True)
                tf.summary.histogram('alpha_outputs', alphas)

            with tf.name_scope('rnn_2'):
                rnn_outputs2, final_state2 = dynamic_rnn(
                    VecAttGRUCell(HIDDEN_SIZE),
                    inputs=rnn_outputs,
                    att_scores=tf.expand_dims(alphas, -1),
                    sequence_length=self.seq_len_ph,
                    dtype=tf.float32,
                    scope="gru2")
                tf.summary.histogram('GRU2_Final_State', final_state2)

            # inp = tf.concat([self.uid_batch_embedded, self.item_eb, final_state2, self.item_his_eb_sum], 1)
            inp = tf.concat([
                self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum,
                self.item_eb * self.item_his_eb_sum, final_state2
            ], 1)
            if self.use_others:
                inp = tf.concat([inp] + list(self.other_inputs()), 1)
        self.build_fcn_net(inp, use_dice=True)
Exemple #27
0
    def __init__(self, hidden_num, cell=None, reverse=True, decode_without_input=False):
        if cell is None:
            self._enc_cell = GRUCell(hidden_num, name='encoder_cell')
            self._dec_cell = GRUCell(hidden_num, name='decoder_cell')
        else:
            self._enc_cell = cell
            self._dec_cell = cell
        self.reverse = reverse
        self.decode_without_input = decode_without_input
        self.hidden_num = hidden_num

        if FLAGS.datasource in ['sinusoid', 'mixture']:
            self.elem_num_init = 2
            self.elem_num=20

        elif FLAGS.datasource in ['miniimagenet', 'omniglot', 'multidataset']:
            self.elem_num = FLAGS.num_classes + 64

        self.dec_weight = tf.Variable(tf.truncated_normal([self.hidden_num,
                                                           self.elem_num], dtype=tf.float32), name='dec_weight')
        self.dec_bias = tf.Variable(tf.constant(0.1, shape=[self.elem_num],
                                                dtype=tf.float32), name='dec_bias')
Exemple #28
0
 def prediction(self):
     # Recurrent network.
     network = GRUCell(self._num_hidden)
     network = DropoutWrapper(network, output_keep_prob=self.dropout)
     network = MultiRNNCell([network] * self._num_layers)
     output, _ = tf.nn.dynamic_rnn(network, data, dtype=tf.float32)
     # Select last output.
     output = tf.transpose(output, [1, 0, 2])
     last = tf.gather(output, int(output.get_shape()[0]) - 1)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Exemple #29
0
    def __init__(self, hidden_num, input_num, cell=None, reverse=True, decode_without_input=False, name=None):
        self.name=name
        if cell is None:
            self._enc_cell = GRUCell(hidden_num, name='encoder_cell_{}'.format(self.name))
            self._dec_cell = GRUCell(hidden_num, name='decoder_cell_{}'.format(self.name))
        else:
            self._enc_cell = cell
            self._dec_cell = cell
        self.reverse = reverse
        self.decode_without_input = decode_without_input
        self.hidden_num = hidden_num

        if FLAGS.datasource in ['2D']:
            self.elem_num_init = 2
            self.elem_num=FLAGS.sync_filters

        elif FLAGS.datasource in ['plainmulti', 'artmulti']:
            self.elem_num = input_num

        self.dec_weight = tf.Variable(tf.truncated_normal([self.hidden_num,
                                                           self.elem_num], dtype=tf.float32), name='dec_weight_{}'.format(self.name))
        self.dec_bias = tf.Variable(tf.constant(0.1, shape=[self.elem_num],
                                                dtype=tf.float32), name='dec_bias_{}'.format(self.name))
Exemple #30
0
    def __init__(self, feature_size, eb_dim, hidden_size, max_len_item, max_len_user, item_part_fnum, user_part_fnum, use_hist_u, use_hist_i, emb_initializer):
        super(GRU4Rec, self).__init__(feature_size, eb_dim, hidden_size, max_len_item, max_len_user, item_part_fnum, user_part_fnum, use_hist_u, use_hist_i, emb_initializer)

        # RNN layer
        with tf.name_scope('item_rnn'):
            _, item_part_final_state = tf.nn.dynamic_rnn(GRUCell(hidden_size), inputs=self.item_part_emb,
                                        sequence_length=self.item_len_ph, dtype=tf.float32, scope='gru1')
        item_part = item_part_final_state
        
        with tf.name_scope('user_rnn'):
            _, user_part_final_state = tf.nn.dynamic_rnn(GRUCell(hidden_size), inputs=self.user_part_emb,
                                        sequence_length=self.user_len_ph, dtype=tf.float32, scope='gru2')
        user_part = user_part_final_state
        
        if use_hist_i and use_hist_u:
            inp = tf.concat([item_part, user_part], axis=1)
        elif use_hist_i and not use_hist_u:
            inp = item_part
        elif not use_hist_i and use_hist_u:
            inp = user_part
        
        # fully connected layer
        self.build_fc_net(inp)
        self.build_loss()
def main(c):
    ''' params:
            c: config dictionary
    '''

    # Data ---------------------------------------------------------------------------------------------------
    data_portion = None  # 2 * batch_size
    train_set = Dstc2('data/dstc2/data.dstc2.train.json', sample_unk=0.01, first_n=data_portion)
    valid_set = Dstc2('data/dstc2/data.dstc2.dev.json', first_n=data_portion, sample_unk=0,
                      max_dial_len=train_set.max_dial_len, words_vocab=train_set.words_vocab,
                      labels_vocab=train_set.labels_vocab, labels_vocab_separate=train_set.labels_vocab_separate)
    test_set = Dstc2('data/dstc2/data.dstc2.test.json', first_n=data_portion, sample_unk=0,
                     max_dial_len=train_set.max_dial_len, words_vocab=train_set.words_vocab,
                     labels_vocab=train_set.labels_vocab, labels_vocab_separate=train_set.labels_vocab_separate)

    stats(train_set, valid_set, test_set)

    vocab_size = len(train_set.words_vocab)
    output_dim = max(np.unique(train_set.labels)) + 1
    n_train_batches = len(train_set.dialogs) // c.batch_size

    # output dimensions for each separate label
    output_dims = []
    for i in range(3):
        o_d = max(np.unique(train_set.labels_separate[:,:,i])) + 1
        output_dims.append(o_d)


    # Model -----------------------------------------------------------------------------------------------------
    logging.info('Creating model')
    input_bt = tf.placeholder('int32', [c.batch_size, train_set.max_turn_len], name='input')
    turn_lens_b = tf.placeholder('int32', [c.batch_size], name='turn_lens')
    mask_b = tf.placeholder('int32', [c.batch_size], name='dial_mask')
    # labels_b = tf.placeholder('int64', [c.batch_size], name='labels')
    # onehot_labels_bo = tf.one_hot(indices=labels_b,
    #                               depth=output_dim,
    #                               on_value=1.0,
    #                               off_value=0.0,
    #                               axis=-1)

    # separate labels and their onehots
    labels0_b, onehot_labels0_bo0 = get_labels_with_onehot(c.batch_size, output_dims[0], 'labels0')
    labels1_b, onehot_labels1_bo1 = get_labels_with_onehot(c.batch_size, output_dims[1], 'labels1')
    labels2_b, onehot_labels2_bo2 = get_labels_with_onehot(c.batch_size, output_dims[2], 'labels2')

    is_first_turn = tf.placeholder(tf.bool)
    gru = GRUCell(c.hidden_state_dim)

    embeddings_we = tf.get_variable('word_embeddings',
                                    initializer=tf.random_uniform([vocab_size, c.embedding_dim], -1.0, 1.0))
    embedded_input_bte = tf.nn.embedding_lookup(embeddings_we, input_bt)
    dialog_state_before_turn = tf.get_variable('dialog_state_before_turn',
                                               initializer=tf.zeros([c.batch_size, c.hidden_state_dim], dtype='float32'),
                                               trainable=False)

    before_state_bh = cond(is_first_turn,
                           lambda: gru.zero_state(c.batch_size, dtype='float32'),
                           lambda: dialog_state_before_turn)

    inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, train_set.max_turn_len, embedded_input_bte)]

    outputs, state_bh = tf.nn.rnn(cell=gru,
                                  inputs=inputs,
                                  initial_state=before_state_bh,
                                  sequence_length=turn_lens_b,
                                  dtype=tf.float32)

    dialog_state_before_turn.assign(state_bh)


    # projection_ho = tf.get_variable('project2labels',
    #                                 initializer=tf.random_uniform([c.hidden_state_dim, output_dim], -1.0, 1.0))



    # logits_bo = tf.matmul(state_bh, projection_ho)
    # tf.histogram_summary('logits', logits_bo)

    # probabilities_bo = tf.nn.softmax(logits_bo)
    # tf.histogram_summary('probabilities', probabilities_bo)


    # logits and probabilites and predictions from hidden state
    logits_bo0, probabilities_bo0, predict_b0 = get_logits_and_probabilities(state_bh, c.hidden_state_dim, output_dims[0], 'labels0')
    logits_bo1, probabilities_bo1, predict_b1 = get_logits_and_probabilities(state_bh, c.hidden_state_dim, output_dims[1], 'labels1')
    logits_bo2, probabilities_bo2, predict_b2 = get_logits_and_probabilities(state_bh, c.hidden_state_dim, output_dims[2], 'labels2')



    float_mask_b = tf.cast(mask_b, 'float32')
    
    # loss = tf.reduce_sum(tf.mul(float_mask_b, x_entropy(logits_bo, onehot_labels_bo))) / tf.reduce_sum(float_mask_b)
    # tf.scalar_summary('CCE loss', loss)

    # losses
    loss_0 = tf.reduce_sum(tf.mul(float_mask_b, x_entropy(logits_bo0, onehot_labels0_bo0))) / tf.reduce_sum(float_mask_b)
    loss_1 = tf.reduce_sum(tf.mul(float_mask_b, x_entropy(logits_bo1, onehot_labels1_bo1))) / tf.reduce_sum(float_mask_b)
    loss_2 = tf.reduce_sum(tf.mul(float_mask_b, x_entropy(logits_bo2, onehot_labels2_bo2))) / tf.reduce_sum(float_mask_b)
    loss = loss_0 + loss_1 + loss_2
    tf.scalar_summary('CCE loss', loss)


    # predict_b = tf.argmax(logits_bo, 1)
    # correct = tf.cast(tf.equal(predict_b, labels_b), 'float32')
    # accuracy = tf.reduce_sum(tf.mul(correct, float_mask_b)) / tf.reduce_sum(float_mask_b)
    # tf.scalar_summary('Accuracy', accuracy)


    # correct
    correct_0 = tf.cast(tf.equal(predict_b0, labels0_b), 'float32')
    correct_1 = tf.cast(tf.equal(predict_b1, labels1_b), 'float32')
    correct_2 = tf.cast(tf.equal(predict_b2, labels2_b), 'float32')
    correct_all = tf.mul(tf.mul(correct_0, correct_1), correct_2)

    # accuracies
    accuracy_0 = get_accuracy(correct_0, float_mask_b)
    accuracy_1 = get_accuracy(correct_1, float_mask_b)
    accuracy_2 = get_accuracy(correct_2, float_mask_b)
    accuracy_all = get_accuracy(correct_all, float_mask_b)
    tf.scalar_summary('Accuracy all', accuracy_all)    
    tf.scalar_summary('Accuracy label 0', accuracy_0)
    tf.scalar_summary('Accuracy label 1', accuracy_1)
    tf.scalar_summary('Accuracy label 2', accuracy_2)



    tb_info = tf.merge_all_summaries()

    # Optimizer  -----------------------------------------------------------------------------------------------------
    logging.info('Creating optimizer')
    optimizer = tf.train.AdamOptimizer(c.learning_rate)
    logging.info('Creating train_op')
    train_op = optimizer.minimize(loss)

    # Session  -----------------------------------------------------------------------------------------------------
    logging.info('Creating session')
    sess = tf.Session()
    logging.info('Initing variables')
    init = tf.initialize_all_variables()
    logging.info('Running session')
    sess.run(init)

    # TB ---------------------------------------------------------------------------------------------------------
    logging.info('See stats via tensorboard: $ tensorboard --logdir %s', c.log_dir)
    train_writer = tf.train.SummaryWriter(c.log_dir, sess.graph)

    # Train ---------------------------------------------------------------------------------------------------------
    train_summary = None
    step, stopper = 0, EarlyStopper(c.nbest_models, c.not_change_limit, c.name)
    try:
        for e in range(c.epochs):
            logging.info('------------------------------')
            logging.info('Epoch %d', e)

            total_loss = 0
            total_acc = 0
            batch_count = 0
            for bid, (dialogs_bTt, lengths_bT, labels0_bT, labels1_bT, labels2_bT, masks_bT) in enumerate(next_batch(train_set, c.batch_size)):
                turn_loss = 0
                turn_acc = 0
                n_turns = 0
                first_run = True
                for (turn_bt, label0_b, label1_b, label2_b, lengths_b, masks_b) in zip(dialogs_bTt.transpose([1, 0, 2]),
                                                                  labels0_bT.transpose([1, 0]),
                                                                  labels1_bT.transpose([1, 0]),
                                                                  labels2_bT.transpose([1, 0]),
                                                                  lengths_bT.transpose([1, 0]),
                                                                  masks_bT.transpose([1,0])):
                    if sum(masks_b) == 0:
                        break 

                    _, batch_loss, batch_accuracy, train_summary = sess.run([train_op, loss, accuracy_all, tb_info],
                                                                            feed_dict={input_bt: turn_bt,
                                                                                       turn_lens_b: lengths_b,
                                                                                       mask_b: masks_b,
                                                                                       labels0_b: label0_b,
                                                                                       labels1_b: label1_b,
                                                                                       labels2_b: label2_b,
                                                                                       is_first_turn: first_run})
                    first_run = False
                    turn_loss += batch_loss
                    turn_acc += batch_accuracy
                    n_turns += 1
                    step += 1

                total_loss += turn_loss / n_turns
                total_acc += turn_acc / n_turns
                batch_count += 1

                logging.info('Batch %d/%d\r', bid, n_train_batches)

            train_writer.add_summary(train_summary, e)
            logging.info('Train cost %f', total_loss / batch_count)
            logging.info('Train accuracy: %f', total_acc / batch_count)

            def monitor_stream(work_set, name):
                total_loss = 0
                total_acc = 0
                n_valid_batches = 0
                for bid, (dialogs_bTt, lengths_bT, labels0_bT, labels1_bT, labels2_bT, masks_bT) in enumerate(next_batch(work_set, c.batch_size)):
                    turn_loss = 0
                    turn_acc = 0
                    n_turns = 0
                    first_run = True
                    for (turn_bt, label0_b, label1_b, label2_b, lengths_b, masks_b) in zip(dialogs_bTt.transpose([1, 0, 2]),
                                                                  labels0_bT.transpose([1, 0]),
                                                                  labels1_bT.transpose([1, 0]),
                                                                  labels2_bT.transpose([1, 0]),
                                                                  lengths_bT.transpose([1, 0]),
                                                                  masks_bT.transpose([1,0])):
                        if sum(masks_b) == 0:
                            break

                        input = np.pad(turn_bt, ((0, 0), (0, train_set.max_turn_len-turn_bt.shape[1])),
                                       'constant', constant_values=0) if train_set.max_turn_len > turn_bt.shape[1]\
                            else turn_bt

                        batch_loss, batch_acc, valid_summary = sess.run([loss, accuracy_all, tb_info],
                                                                                     feed_dict={input_bt: input,
                                                                                                turn_lens_b: lengths_b,
                                                                                               labels0_b: label0_b,
                                                                                               labels1_b: label1_b,
                                                                                               labels2_b: label2_b,
                                                                                                mask_b: masks_b,
                                                                                                is_first_turn: first_run})
                        turn_loss += batch_loss
                        turn_acc += batch_acc
                        first_run = False
                        n_turns += 1

                    total_loss += turn_loss / n_turns
                    total_acc += turn_acc / n_turns
                    n_valid_batches += 1

                logging.info('%s cost: %f', name, total_loss/n_valid_batches)
                logging.info('%s accuracy: %f', name, total_acc/n_valid_batches)

                return  total_loss/n_valid_batches

            stopper_reward = monitor_stream(valid_set, 'Valid')
            monitor_stream(test_set, 'Test')
            if not stopper.save_and_check(stopper_reward, step, sess):
                raise RuntimeError('Training not improving on dev set')
    finally:
        logging.info('Training stopped after %7d steps and %7.2f epochs. See logs for %s', step, step / len(train_set), c.log_name)
        logging.info('Saving current state. Please wait!\nBest model has reward %7.2f form step %7d is %s' % stopper.highest_reward())
        stopper.saver.save(sess=sess, save_path='%s-FINAL-%.4f-step-%07d' % (stopper.saver_prefix, stopper_reward, step))
Exemple #32
0
def main():
    # Config -----------------------------------------------------------------------------------------------------
    learning_rate = 0.005
    batch_size = 16
    epochs = 50
    hidden_state_dim = 200
    embedding_dim = 300
    log_dir = 'log'

    # Data ---------------------------------------------------------------------------------------------------
    data_portion =  2 * batch_size
    train_set = Dstc2('../data/dstc2/data.dstc2.train.json', sample_unk=0.01, first_n=data_portion)
    valid_set = Dstc2('../data/dstc2/data.dstc2.dev.json', first_n=data_portion, sample_unk=0, max_dial_len=train_set.max_dial_len, words_vocab=train_set.words_vocab, labels_vocab=train_set.labels_vocab)
    test_set = Dstc2('../data/dstc2/data.dstc2.test.json', first_n=data_portion, sample_unk=0, max_dial_len=train_set.max_dial_len, words_vocab=train_set.words_vocab, labels_vocab=train_set.labels_vocab)

    vocab_size = len(train_set.words_vocab)
    output_dim = max(np.unique(train_set.labels)) + 1
    n_train_batches = len(train_set.dialogs) // batch_size

    # Model -----------------------------------------------------------------------------------------------------
    logging.info('Creating model')
    input_bt = tf.placeholder('int32', [batch_size, train_set.max_turn_len], name='input')
    turn_lens_b = tf.placeholder('int32', [batch_size], name='turn_lens')
    mask_b = tf.placeholder('int32', [batch_size], name='dial_mask')
    # mask_bT = lengths2mask2d(dial_lens_b, train_set.max_dial_len)
    labels_b = tf.placeholder('int64', [batch_size], name='labels')
    onehot_labels_bo = tf.one_hot(indices=labels_b,
                                  depth=output_dim,
                                  on_value=1.0,
                                  off_value=0.0,
                                  axis=-1)
    is_first_turn = tf.placeholder(tf.bool)
    gru = GRUCell(hidden_state_dim)
    mlp_hidden_layer_dim = 50
    mlp_input2hidden_W = tf.get_variable('in2hid', initializer=tf.random_normal([hidden_state_dim, mlp_hidden_layer_dim]))
    mlp_input2hidden_B = tf.Variable(tf.random_normal([mlp_hidden_layer_dim]))
    mlp_hidden2output_W = tf.get_variable('hid2out', initializer=tf.random_normal([mlp_hidden_layer_dim, output_dim]))
    mlp_hidden2output_B = tf.Variable(tf.random_normal([output_dim]))

    embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([vocab_size, embedding_dim], -1.0, 1.0))
    embedded_input_bte = tf.nn.embedding_lookup(embeddings_we, input_bt)
    dialog_state_before_turn = tf.get_variable('dialog_state_before_turn', initializer=tf.zeros([batch_size, hidden_state_dim], dtype='float32'), trainable=False)

    before_state_bh = cond(is_first_turn,
        lambda: gru.zero_state(batch_size, dtype='float32'),
        lambda: dialog_state_before_turn)

    inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, train_set.max_turn_len, embedded_input_bte)]

    outputs, state_bh = tf.nn.rnn(cell=gru,
            inputs=inputs,
            initial_state=before_state_bh,
            sequence_length=turn_lens_b,
            dtype=tf.float32)

    # state_tbh = scan(fn=lambda last_state_bh, curr_input_bte: gru(curr_input_bte, last_state_bh)[1],
    #                 elems=tf.transpose(embedded_input_bte, perm=[1, 0, 2]),
    #                 initializer=before_state_bh)

    # state_bh = state_tbh[state_tbh.get_shape()[0]-1, :, :]
    dialog_state_before_turn.assign(state_bh)

    projection_ho = tf.get_variable('project2labels',
                                    initializer=tf.random_uniform([hidden_state_dim, output_dim], -1.0, 1.0))


    logits_bo = tf.matmul(state_bh, projection_ho)
    # hidden =  tf.add(tf.matmul(state_bh, mlp_input2hidden_W), mlp_input2hidden_B)
    # logits_bo = tf.add(tf.matmul(hidden, mlp_hidden2output_W), mlp_hidden2output_B
    tf.histogram_summary('logits', logits_bo)

    probabilities_bo = tf.nn.softmax(logits_bo)
    tf.histogram_summary('probabilities', probabilities_bo)

    float_mask_b = tf.cast(mask_b,'float32')
    # loss = tf.matmul(tf.expand_dims(tf.cast(mask_b, 'float32'), 0), tf.nn.softmax_cross_entropy_with_logits(logits_bo, onehot_labels_bo)) / tf.reduce_sum(mask_b)
    loss = tf.reduce_sum(tf.mul(float_mask_b, tf.nn.softmax_cross_entropy_with_logits(logits_bo, onehot_labels_bo))) / tf.reduce_sum(float_mask_b)


    tf.scalar_summary('CCE loss', loss)

    predict_b = tf.argmax(logits_bo, 1)
    correct = tf.cast(tf.equal(predict_b, labels_b), 'float32')
    accuracy = tf.reduce_sum(tf.mul(correct, float_mask_b)) / tf.reduce_sum(float_mask_b)

    tf.scalar_summary('Accuracy', accuracy)
    tb_info = tf.merge_all_summaries()

    # Optimizer  -----------------------------------------------------------------------------------------------------
    logging.info('Creating optimizer')
    optimizer = tf.train.AdamOptimizer(learning_rate)
    logging.info('Creating train_op')
    train_op = optimizer.minimize(loss)
    # Session  -----------------------------------------------------------------------------------------------------
    logging.info('Creating session')
    sess = tf.Session()
    logging.info('Initing variables')
    init = tf.initialize_all_variables()
    logging.info('Running session')
    sess.run(init)

    # TB ---------------------------------------------------------------------------------------------------------
    logging.info('See stats via tensorboard: $ tensorboard --logdir %s', log_dir)
    train_writer = tf.train.SummaryWriter(log_dir, sess.graph)

    # Train ---------------------------------------------------------------------------------------------------------
    train_summary = None
    for e in range(epochs):
        logging.info('------------------------------')
        logging.info('Epoch %d', e)

        total_loss = 0
        total_acc = 0
        batch_count = 0
        for bid, (dialogs_bTt, lengths_bT, labels_bT, masks_bT) in enumerate(next_batch(train_set, batch_size)):
            turn_loss = 0
            turn_acc = 0
            n_turns = 0
            first_run = True
            for (turn_bt, label_b, lengths_b, masks_b) in zip(dialogs_bTt.transpose([1,0,2]), labels_bT.transpose([1,0]), lengths_bT.transpose([1,0]), masks_bT.transpose([1,0])):
                if sum(masks_b) == 0:
                    break
                _, batch_loss, batch_accuracy, train_summary = sess.run([train_op, loss, accuracy, tb_info], feed_dict={input_bt: turn_bt,
                                                                                              turn_lens_b: lengths_b,
                                                                                              mask_b: masks_b,
                                                                                              labels_b: label_b,
                                                                                              is_first_turn:first_run})
                first_run = False
                turn_loss += batch_loss
                turn_acc += batch_accuracy
                n_turns += 1
            total_loss += turn_loss / n_turns
            total_acc += turn_acc / n_turns
            batch_count += 1
            logging.info('Batch %d/%d\r', bid, n_train_batches)

        train_writer.add_summary(train_summary, e)
        logging.info('Average train cost %f', total_loss / batch_count)
        logging.info('Average train accuracy: %f', total_acc / batch_count)

        def monitor_stream(work_set, name):
            total_loss = 0
            total_acc = 0
            n_valid_batches = 0
            for bid, (dialogs_bTt, lengths_bT, labels_bT, masks_bT) in enumerate(next_batch(work_set, batch_size)):
                turn_loss = 0
                turn_acc = 0
                n_turns = 0
                first_run = True
                for (turn_bt, label_b, lengths_b, masks_b) in zip(dialogs_bTt.transpose([1,0,2]), labels_bT.transpose([1,0]), lengths_bT.transpose([1,0]), masks_bT.transpose([1,0])):
                    if sum(masks_b) == 0:
                        break
                    input = np.pad(turn_bt, ((0,0), (0, train_set.max_turn_len-turn_bt.shape[1])), 'constant', constant_values=0) if train_set.max_turn_len > turn_bt.shape[1] else turn_bt
                    predictions, batch_loss, batch_acc, valid_summary = sess.run([predict_b, loss, accuracy, tb_info], feed_dict={input_bt: input,
                                                                    turn_lens_b: lengths_b,
                                                                    labels_b: label_b,
                                                                    mask_b: masks_b,
                                                                    is_first_turn:first_run})
                    turn_loss += batch_loss
                    turn_acc += batch_acc
                    first_run = False
                    n_turns += 1
                total_loss += turn_loss / n_turns
                total_acc += turn_acc / n_turns
                n_valid_batches += 1

            logging.info('%s cost: %f', name, total_loss/n_valid_batches)
            logging.info('%s accuracy: %f', name, total_acc/n_valid_batches)

        monitor_stream(valid_set, 'Valid')
        monitor_stream(test_set, 'Test')