예제 #1
0
 def state_size(self):
     cs_size = self._num_units * 5
     return (LSTMStateTuple(cs_size, 5 * self._num_units)
             if self._state_is_tuple else 2 * self._num_units)
예제 #2
0
 def __init__(self, is_train, params):
     self.is_train = is_train
     self.batch_size = params['batch_size']
     self.n_seqs = params['n_seqs']
     self.lstm_size = params['lstm_size']
     self.num_layers = params['num_layers']
     self.keep_prob = params['keep_prob']
     self.learning_rate = params['learning_rate']
     self.input_size = params['input_size']
     self.output_size = params['output_size']
     self.num_scopes = len(self.n_seqs[0])
     self.grad_clip = 2
     self.targets = tf.placeholder(tf.float32,
                                   shape=[None, self.output_size])
     #lstm=tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
     #drop=tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=keep_prob)
     self.inputs = []
     self.initial_states = []
     self.states = []
     self.in_outputs = []
     self.scope_scales = []
     for i in range(self.num_scopes):
         with tf.variable_scope("lstm_scope" + str(i)):
             self.inputs.append(
                 tf.placeholder(
                     tf.float32,
                     shape=[None, self.n_seqs[1][i], self.input_size]))
             if i > 0:
                 self.scope_scales.append(
                     tf.placeholder(tf.float32, [1], "scale"))
             cell = tf.nn.rnn_cell.MultiRNNCell(
                 [self.lstm_cell() for _ in range(self.num_layers)])
             #self.initial_states.append(cell.zero_state(self.batch_size, tf.float32))
             print(cell.name)
             if i == 0:
                 initstate = [
                     LSTMStateTuple(
                         tf.zeros([self.batch_size, self.lstm_size],
                                  tf.float32),
                         tf.zeros([self.batch_size, self.lstm_size],
                                  tf.float32))
                 ]
             else:
                 initstate = [
                     LSTMStateTuple(
                         tf.zeros([self.batch_size, self.lstm_size],
                                  tf.float32), self.in_outputs[i - 1])
                 ]
             for j in range(self.num_layers - 1):
                 initstate.append(
                     LSTMStateTuple(
                         tf.zeros([self.batch_size, self.lstm_size],
                                  tf.float32),
                         tf.zeros([self.batch_size, self.lstm_size],
                                  tf.float32)))
             self.initial_states.append(tuple(initstate))
             cell_inputs = self.inputs[i]
             cell_outputs, state = tf.nn.dynamic_rnn(
                 cell,
                 cell_inputs,
                 initial_state=self.initial_states[i],
                 dtype=tf.float32)
             self.states.append(state)
             with tf.variable_scope('attention'):
                 # attention
                 Wc = tf.get_variable('weighted_c',
                                      shape=(self.lstm_size, 1))
                 Wh = tf.get_variable('weighted_h',
                                      shape=(self.lstm_size, 1))
                 wcc = tf.matmul(state[self.num_layers - 1].c, Wc)
                 ms = [
                     tf.nn.tanh(wcc[i] +
                                tf.matmul(cell_outputs[i, :, :], Wh))
                     for i in range(self.batch_size)
                 ]
                 ws = tf.nn.softmax(ms)
                 re_outs = tf.reduce_sum(tf.multiply(ws, cell_outputs),
                                         axis=1)
             #print(re_outs.shape)
             #re_outs=cell_outputs[:, -1, :]
             #if i>0: re_outs=tf.add((1-self.scope_scales[i-1])*re_outs, self.scope_scales[i-1]*self.in_outputs[i-1])
             if i > 0:
                 re_outs = tf.add(0.8 * re_outs,
                                  0.2 * self.in_outputs[i - 1])
             self.in_outputs.append(re_outs)
     #print(self.in_outputs[0].name, self.in_outputs[1].name, self.in_outputs[2].name)
     w_o = tf.get_variable('weight', [self.lstm_size, self.output_size])
     b_o = tf.get_variable('bias', [self.output_size])
     print(w_o.name)
     self.outputs = tf.matmul(self.in_outputs[-1], w_o) + b_o
     #loss and optimizer
     self.loss = tf.sqrt(
         tf.reduce_mean(tf.square(self.targets - self.outputs)))
     if is_train:
         #clipping gradients optimizer
         self.tvars = tf.trainable_variables()
         self.grads, _ = tf.clip_by_global_norm(
             tf.gradients(self.loss, self.tvars), self.grad_clip)
         self.optimizer = tf.train.AdamOptimizer(
             self.learning_rate).apply_gradients(zip(
                 self.grads, self.tvars))
예제 #3
0
    def make_model(self):             
        # number of programs * number of paths * number of executions * number of states/number of statements, number of variables  
        self.placeholders["executions"] = tf.placeholder(tf.int32, [None, None], name="executions")
        # number of programs * number of paths * number of executions * number of states/number of statements
        self.placeholders["variable_number_sequence"] = tf.placeholder(tf.int32, [None], name="variable_number_sequence")

        # number of programs * number of paths * number of statements/number of states, number of tokens           
        self.placeholders["tokens"] = tf.placeholder(tf.int32, [None, None], name="tokens")
        # number of programs * number of paths * number of statements/number of states       
        self.placeholders["tokens_number_sequence"] = tf.placeholder(tf.int32, [None], name="tokens_number_sequence")
 
        # number of programs * number of paths  
        self.placeholders["state_statement_number_sequence"] = tf.placeholder(tf.int32, [None], name="state_statement_number_sequence")
         
        self.placeholders['rnn_state_dropout_keep_prob'] = tf.placeholder(tf.float32, None, 
                                                                          name='rnn_state_dropout_keep_prob') 
 
        self.placeholders['mlp_dropout_keep_prob'] = tf.placeholder(tf.float32, None, name='mlp_dropout_keep_prob')           
         
        self.placeholders["max_state_statement"] = tf.placeholder(tf.int32, None, name="max_state")
         
        self.placeholders['label'] = tf.placeholder(tf.int32, [None], name='label')
                                
                                
        batch_size = self.params["number_of_programs"] * self.params["number_of_paths"]
                 
                                   
        embedding_matrix = tf.get_variable('embedding_matrix', [len(self.vocabulary)+1, self.params["vocabulary_embedding_size"]]) 
        # number of programs * number of paths * number of executions * number of states/number of statements,  number of variables , embedding_size
        embedded_executions = tf.nn.embedding_lookup(params=embedding_matrix, ids=self.placeholders["executions"])
        # number of programs * number of paths * number of states/number of statements,  number of tokens , embedding_size
        embedded_tokens = tf.nn.embedding_lookup(params=embedding_matrix, ids=self.placeholders["tokens"])
        
        
        with tf.variable_scope("state_encoding"):
            state_encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.params["hidden_size"]) 
            state_encoder_cell = tf.nn.rnn_cell.DropoutWrapper(state_encoder_cell, 
                                                               state_keep_prob=self.params["rnn_state_dropout_keep_prob"])
            _, states_embedding = tf.nn.dynamic_rnn(state_encoder_cell, embedded_executions, 
                                                    sequence_length=self.placeholders["variable_number_sequence"],
                                                    initial_state=state_encoder_cell.zero_state(tf.shape(embedded_executions)[0], 
                                                                                                tf.float32),
                                                    dtype=tf.float32)
        # number of programs * number of paths * number of executions, number of states/number of statements, embedding_size
        dynamic_state_embedding = tf.convert_to_tensor(tf.split(states_embedding[1], batch_size * self.params["number_of_executions"], axis=0))
        
        
        with tf.variable_scope("statement_encoding"):
            statement_encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.params["hidden_size"])
            statement_encoder_cell = tf.nn.rnn_cell.DropoutWrapper(statement_encoder_cell, 
                                                                   state_keep_prob=self.params["rnn_state_dropout_keep_prob"])
            _, statements_embedding = tf.nn.dynamic_rnn(statement_encoder_cell, embedded_tokens, 
                                                        sequence_length=self.placeholders["tokens_number_sequence"],
                                                        initial_state=statement_encoder_cell.zero_state(tf.shape(embedded_tokens)[0],
                                                                                                        tf.float32),
                                                        dtype=tf.float32)
        # number of programs * number of paths, number of states/number of statements, embedding_size
        static_tokens_embedding = tf.convert_to_tensor(tf.split(statements_embedding[1], batch_size, axis=0))
        
        
        trace_encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.params["hidden_size"])
        trace_encoder_cell = tf.nn.rnn_cell.DropoutWrapper(trace_encoder_cell,
                                                           state_keep_prob=self.params["rnn_state_dropout_keep_prob"])        
        trace_encoder_initial_states = trace_encoder_cell.zero_state(batch_size, tf.float32)
        
        # axis zero stands for two LSTM states: c and h 
        trace_encoder_final_states = tf.zeros([2, batch_size, self.params["hidden_size"]])
                
        loop_counter_inital = tf.constant(0)
        
        monitor_rnn_states = tf.zeros([0, batch_size, self.params["hidden_size"]])
        monitor_output = tf.zeros([0, batch_size, self.params["hidden_size"]])
        monitor_mask = tf.zeros([0, batch_size], tf.float32)
        monitor_attention_probabilities = tf.zeros([0, self.params["number_of_executions"]+1, 1], tf.float32)
 
         
        def while_condition(loop_counter,state_statement_number_sequence, rnn_states, trace_encoder_final_states, 
                            monitor_rnn_states, monitor_mask, monitor_output, monitor_attention_probabilities):
            return loop_counter < self.placeholders["max_state_statement"]
        
        def while_body(loop_counter, state_statement_number_sequence, rnn_states, trace_encoder_final_states, 
                       monitor_rnn_states, monitor_mask, monitor_output, monitor_attention_probabilities):
            loop_counter_current = loop_counter
                        
            # number of programs * number of paths * number of executions, embedding_size
            current_states = tf.gather_nd(dynamic_state_embedding, 
                                          tf.stack([tf.range(0, batch_size * self.params["number_of_executions"]), 
                                                    tf.zeros([batch_size * self.params["number_of_executions"]], tf.int32)+loop_counter_current], axis=1))   
            # number of programs * number of paths, number of executions, embedding_size
            current_states = tf.convert_to_tensor(tf.split(current_states, batch_size, axis=0))
                        
            # number of programs * number of paths, embedding_size
            current_tokens = tf.gather_nd(static_tokens_embedding, 
                                          tf.stack([tf.range(0, batch_size), tf.zeros([batch_size], tf.int32)+loop_counter_current], axis=1))
            # number of programs * number of paths, 1, embedding_size            
            current_tokens = tf.expand_dims(current_tokens, axis=1)
            
            # number of programs * number of paths, number of executions+1, embedding_size            
            current_states_and_tokens = tf.concat([current_states,current_tokens], axis=1)
            
            # number of programs * number of paths, 2 * lstm hidden_size
            rnn_states_concat = tf.concat((rnn_states[0], rnn_states[1]), axis=1)     
            # number of programs * number of paths, 1, 2 * lstm hidden_size            
            rnn_states_concat = tf.expand_dims(rnn_states_concat, axis=1)
            # 1 * number of executions+1 * 1
            replicate_factor = tf.ones([1,self.params["number_of_executions"]+1,1], tf.float32)
            # number of programs * number of paths, number of executions+1, 2 * lstm hidden_size            
            rnn_states_concat = rnn_states_concat * replicate_factor
            
            # number of programs * number of paths, number of executions+1, embedding_size + 2 * lstm hidden_size          
            rnn_inputs_and_states = tf.concat([current_states_and_tokens,rnn_states_concat], axis=-1)
            # number of programs * number of paths * number of executions+1, embedding_size + 2 * lstm hidden_size                      
            rnn_inputs_and_states = tf.concat(tf.unstack(rnn_inputs_and_states, num=batch_size, axis=0), 0)
 
            # number of programs * number of paths * number of executions+1, 1                       
            attention_scores_fn = MLP(rnn_inputs_and_states, 0, 1, self.placeholders['mlp_dropout_keep_prob'])      
            # number of programs * number of paths, number of executions+1, 1                                             
            attention_scores = tf.convert_to_tensor(tf.split(attention_scores_fn(), batch_size, axis=0))
            attention_probabilities = tf.nn.softmax(attention_scores, dim=1)
            
            monitor_attention_probabilities = tf.concat([monitor_attention_probabilities, attention_probabilities], axis=0)
            
            # number of programs * number of paths, embedding_size     
            inputs_after_attention = tf.reduce_sum(attention_probabilities * current_states_and_tokens, axis=1) 
                               
            _, rnn_states = trace_encoder_cell(inputs_after_attention, rnn_states)    
                
            monitor_rnn_states = tf.concat([monitor_rnn_states, rnn_states], axis=0)    
            
            monitor_output = tf.concat([monitor_output, tf.expand_dims(rnn_states[1], axis=0)], axis=0)
                    
            loop_counter_current += 1
            
            mask = tf.zeros([0], tf.float32)
            
            it_state_length = tf.unstack(state_statement_number_sequence, batch_size, axis=0)
            
            for each_state_length in it_state_length:
                def f1():          
                    return tf.zeros([1], tf.float32)
                def f2():                                                                                                
                    return tf.ones([1], tf.float32)
                
                result = tf.cond(tf.equal(each_state_length,loop_counter_current), f2, f1)    
                mask = tf.concat([mask, result], axis=0)
        
                
            monitor_mask = tf.concat([monitor_mask, tf.expand_dims(mask,0)], axis=0)        
                
            mask = tf.expand_dims(mask, axis=1)  
                    
            trace_encoder_final_states = trace_encoder_final_states + mask * rnn_states
        
            return [loop_counter_current, state_statement_number_sequence, rnn_states, trace_encoder_final_states, 
                    monitor_rnn_states, monitor_mask, monitor_output, monitor_attention_probabilities]
        
        
        _, _, _, self.ops['l_res'], self.ops['l_mono'], self.ops['l_mono_mask'], self.ops['l_mono_out'], self.ops['l_mono_attention'] = \
        tf.while_loop(while_condition, 
                      while_body, 
                      loop_vars=[loop_counter_inital, self.placeholders['state_statement_number_sequence'], 
                                 trace_encoder_initial_states, trace_encoder_final_states, monitor_rnn_states, 
                                 monitor_mask, monitor_output, monitor_attention_probabilities], 
                      shape_invariants=[loop_counter_inital.shape, 
                                        self.placeholders["state_statement_number_sequence"].shape, 
                                        LSTMStateTuple(tf.TensorShape([batch_size,self.params["hidden_size"]]), 
                                                       tf.TensorShape([batch_size,self.params["hidden_size"]])),
                                        trace_encoder_final_states.shape, 
                                        tf.TensorShape([None, batch_size, self.params["hidden_size"]]), 
                                        tf.TensorShape([None, batch_size]),
                                        tf.TensorShape([None, batch_size, self.params["hidden_size"]]),
                                        tf.TensorShape([None, self.params["number_of_executions"]+1, 1])])
        
        self.ops['attention'] = tf.squeeze(tf.reduce_mean(self.ops['l_mono_attention'], axis=0), axis=-1) 
        
        h_states = self.ops['l_res'][0]
#         c_states = self.ops['l_res'][1]
#         hc_conca_states = tf.concat([h_states,c_states], axis=1)
         
        state_rep = h_states
        self.ops['final_embeddings'] = tf.reduce_max(tf.stack(tf.split(state_rep, 
                                                                       self.params["number_of_programs"], axis=0), axis=0), axis=1)
         
        W_pred = tf.get_variable("weights_for_prediction", [self.params["hidden_size"], self.params["num_labels"]], tf.float32)
        b_pred = tf.get_variable("bias_for_prediction", [self.params["num_labels"]], tf.float32)
        logits = tf.matmul(self.ops['final_embeddings'], W_pred) + b_pred
        
        predictions = tf.argmax(logits, 1)
        comparisons = tf.cast(tf.equal(tf.cast(predictions,tf.int32), self.placeholders['label']),tf.float32)
        accuracy = tf.reduce_mean(comparisons)            
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.placeholders['label'], logits=logits)
         
        self.ops["predictions"] = predictions
        self.ops["comparisons"] = comparisons
        self.ops["accuracy"] = accuracy                                
        self.ops["loss"] = tf.reduce_sum(loss)        
def Model(_abnormal_data, _abnormal_label, _hidden_num, _elem_num, _file_name,
          _partition):
    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
        # placeholder list
        p_input = tf.placeholder(tf.float32,
                                 shape=(batch_num, _abnormal_data.shape[1],
                                        _abnormal_data.shape[2]))
        p_inputs = [
            tf.squeeze(t, [1])
            for t in tf.split(p_input, _abnormal_data.shape[1], 1)
        ]

        # projection_layer = tf.layers.Dense(units=_elem_num, use_bias=True)

        # with tf.device('/device:GPU:0'):
        d_enc = {}
        with tf.variable_scope('encoder'):
            for j in range(ensemble_space):
                if cell_type == 0:
                    enc_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                if cell_type == 1:
                    pure_enc_cell = LSTMCell(_hidden_num)
                    residual_enc_cell = RLSTMCell(_hidden_num,
                                                  reuse=tf.AUTO_REUSE)
                    enc_cell = RSLSTMCell(_hidden_num,
                                          file_name=_file_name,
                                          component=j,
                                          partition=_partition,
                                          type='enc',
                                          reuse=tf.AUTO_REUSE)
                if cell_type == 2:
                    pure_enc_cell = GRUCell(_hidden_num)
                    enc_cell = RSGRUCell(_hidden_num)

                if j == 0:
                    enc_state = pure_enc_cell.zero_state(batch_size=batch_num,
                                                         dtype=tf.float32)
                    enc_outputs = []
                    for step in range(len(p_inputs)):
                        enc_input = p_inputs[step]
                        enc_output_, enc_state = pure_enc_cell(
                            enc_input, enc_state)
                        enc_outputs.append(enc_output_)

                    d_enc['enc_output_{0}'.format(j)] = enc_outputs
                    d_enc['enc_state_{0}'.format(j)] = enc_state

                elif j == 1:
                    enc_state = residual_enc_cell.zero_state(
                        batch_size=batch_num, dtype=tf.float32)
                    enc_outputs = []
                    for step in range(len(p_inputs)):
                        enc_input = p_inputs[step]
                        enc_output_, enc_state = residual_enc_cell(
                            enc_input, enc_state)
                        enc_outputs.append(enc_output_)

                    d_enc['enc_output_{0}'.format(j)] = enc_outputs
                    d_enc['enc_state_{0}'.format(j)] = enc_state

                else:
                    enc_state = enc_cell.zero_state(batch_size=batch_num,
                                                    dtype=tf.float32)
                    enc_outputs = []
                    for step in range(len(p_inputs)):
                        enc_input = p_inputs[step]
                        enc_output_, enc_state = enc_cell(enc_input, enc_state)
                        enc_outputs.append(enc_output_)

                    d_enc['enc_output_{0}'.format(j)] = enc_outputs
                    d_enc['enc_state_{0}'.format(j)] = enc_state

            shared_state_c = tf.concat([
                d_enc['enc_state_{0}'.format(j)].c
                for j in range(ensemble_space)
            ],
                                       axis=1)
            shared_state_h = tf.concat([
                d_enc['enc_state_{0}'.format(j)].h
                for j in range(ensemble_space)
            ],
                                       axis=1)

            if compress:
                compress_state = tf.layers.Dense(units=_hidden_num,
                                                 activation=tf.tanh,
                                                 use_bias=True)
                shared_state_c = compress_state(shared_state_c)
                shared_state_h = compress_state(shared_state_h)

            shared_state = LSTMStateTuple(shared_state_c, shared_state_h)

        # with tf.device('/device:GPU:1'):
        d_dec = {}
        with tf.variable_scope('decoder') as vs:
            dec_weight_ = tf.Variable(tf.truncated_normal(
                [_hidden_num * ensemble_space, _elem_num], dtype=tf.float32),
                                      name="dec_weight")
            dec_bias_ = tf.Variable(tf.constant(0.1,
                                                shape=[_elem_num],
                                                dtype=tf.float32),
                                    name="dec_bias")
            if decode_without_input:
                for k in range(ensemble_space):
                    if cell_type == 0:
                        dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                    if cell_type == 1:
                        if compress:
                            pure_dec_cell = LSTMCell(_hidden_num)
                            residual_dec_cell = RLSTMCell(_hidden_num)
                            dec_cell = RSLSTMCell(_hidden_num,
                                                  file_name=_file_name,
                                                  component=k,
                                                  partition=_partition,
                                                  type='dec',
                                                  reuse=tf.AUTO_REUSE)
                        else:
                            pure_dec_cell = LSTMCell(_hidden_num *
                                                     ensemble_space)
                            residual_dec_cell = RLSTMCell(_hidden_num *
                                                          ensemble_space)
                            dec_cell = RSLSTMCell(_hidden_num * ensemble_space,
                                                  file_name=_file_name,
                                                  component=k,
                                                  partition=_partition,
                                                  type='dec',
                                                  reuse=tf.AUTO_REUSE)
                    if cell_type == 2:
                        if compress:
                            pure_dec_cell = GRUCell(_hidden_num)
                            dec_cell = RSGRUCell(_hidden_num)
                        else:
                            pure_dec_cell = GRUCell(_hidden_num *
                                                    ensemble_space)
                            dec_cell = RSGRUCell(_hidden_num * ensemble_space)

                    if k == 0:
                        dec_inputs = [
                            tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                            for _ in range(len(p_inputs))
                        ]
                        dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                            pure_dec_cell,
                            dec_inputs,
                            initial_state=shared_state,
                            dtype=tf.float32)
                    elif k == 1:
                        dec_inputs = [
                            tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                            for _ in range(len(p_inputs))
                        ]
                        dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                            residual_dec_cell,
                            dec_inputs,
                            initial_state=shared_state,
                            dtype=tf.float32)
                    else:
                        dec_inputs = [
                            tf.zeros(tf.shape(p_inputs[0]), dtype=tf.float32)
                            for _ in range(len(p_inputs))
                        ]
                        dec_outputs, dec_state = tf.contrib.rnn.static_rnn(
                            dec_cell,
                            dec_inputs,
                            initial_state=shared_state,
                            dtype=tf.float32)

                    if reverse:
                        dec_outputs = dec_outputs[::-1]

                    dec_output_ = tf.transpose(tf.stack(dec_outputs),
                                               [1, 0, 2])
                    dec_weight_ = tf.tile(tf.expand_dims(dec_weight_, 0),
                                          [batch_num, 1, 1])
                    d_dec['dec_output_{0}'.format(k)] = tf.matmul(
                        dec_output_, dec_weight_) + dec_bias_

                    if reverse:
                        d_dec['dec_output_{0}'.format(k)] = d_dec[
                            'dec_output_{0}'.format(k)][::-1]

            else:
                for k in range(ensemble_space):
                    if cell_type == 0:
                        dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                    if cell_type == 1:
                        if compress:
                            pure_dec_cell = LSTMCell(_hidden_num)
                            residual_dec_cell = RLSTMCell(_hidden_num,
                                                          reuse=tf.AUTO_REUSE)
                            dec_cell = RSLSTMCell(_hidden_num,
                                                  file_name=_file_name,
                                                  component=k,
                                                  partition=_partition,
                                                  type='dec',
                                                  reuse=tf.AUTO_REUSE)
                        else:
                            pure_dec_cell = LSTMCell(_hidden_num *
                                                     ensemble_space)
                            residual_dec_cell = RLSTMCell(_hidden_num *
                                                          ensemble_space,
                                                          reuse=tf.AUTO_REUSE)
                            dec_cell = RSLSTMCell(_hidden_num * ensemble_space,
                                                  file_name=_file_name,
                                                  component=k,
                                                  partition=_partition,
                                                  type='dec',
                                                  reuse=tf.AUTO_REUSE)
                    if cell_type == 2:
                        if compress:
                            pure_dec_cell = GRUCell(_hidden_num)
                            dec_cell = RSGRUCell(_hidden_num)
                        else:
                            pure_dec_cell = GRUCell(_hidden_num *
                                                    ensemble_space)
                            dec_cell = RSGRUCell(_hidden_num * ensemble_space)

                    if k == 0:
                        dec_state = shared_state
                        dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                              dtype=tf.float32)
                        dec_outputs = []
                        for step in range(len(p_inputs)):
                            if step > 0:
                                vs.reuse_variables()
                            dec_input_, dec_state = pure_dec_cell(
                                dec_input_, dec_state)
                            dec_input_ = tf.matmul(dec_input_,
                                                   dec_weight_) + dec_bias_
                            dec_outputs.append(dec_input_)

                    elif k == 1:
                        dec_state = shared_state
                        dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                              dtype=tf.float32)
                        dec_outputs = []
                        for step in range(len(p_inputs)):
                            if step > 0:
                                vs.reuse_variables()
                            dec_input_, dec_state = residual_dec_cell(
                                dec_input_, dec_state)
                            dec_input_ = tf.matmul(dec_input_,
                                                   dec_weight_) + dec_bias_
                            dec_outputs.append(dec_input_)

                    else:
                        dec_state = shared_state
                        dec_input_ = tf.zeros(tf.shape(p_inputs[0]),
                                              dtype=tf.float32)
                        dec_outputs = []
                        for step in range(len(p_inputs)):
                            if step > 0:
                                vs.reuse_variables()
                            dec_input_, dec_state = dec_cell(
                                dec_input_, dec_state)
                            dec_input_ = tf.matmul(dec_input_,
                                                   dec_weight_) + dec_bias_
                            dec_outputs.append(dec_input_)

                    d_dec['dec_output_{0}'.format(k)] = dec_outputs

                    if reverse:
                        d_dec['dec_output_{0}'.format(k)] = d_dec[
                            'dec_output_{0}'.format(k)][::-1]

        sum_of_difference = 0
        for i in range(ensemble_space):
            sum_of_difference += d_dec['dec_output_{0}'.format(i)][0] - p_input

        loss = tf.reduce_mean(tf.square(sum_of_difference))
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(loss)
        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()
    return g, p_input, d_dec, loss, optimizer, saver
예제 #5
0
    def call(self, inputs, state):
        """Run one step of LSTM.
    Args:
      inputs: input Tensor, must be 2-D, `[batch, input_size]`.
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.
    Returns:
      A tuple containing:
      - A `2-D, [batch, output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.
    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        # No feedback, if desired; also, gcnn/cnn do not have feedback
        if self._no_feedback or self._gate_mod in ["gcnn", "cnn"]:
            m_prev = tf.zeros(m_prev.shape)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        if self._ngram:
            lstm_matrix = inputs + math_ops.matmul(m_prev, self._kernel)
        else:
            lstm_matrix = math_ops.matmul(
                array_ops.concat([inputs, m_prev], 1), self._kernel)
        lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)

        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)
        # Diagonal connections
        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        elif self._gate_mod == "lstm":
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))
        elif self._gate_mod == "rkm_lstm":
            c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * j)
        elif self._gate_mod == "rkm_cifg":
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 (1 - sigmoid(f + self._forget_bias)) * j)
        elif self._gate_mod in ["gated_linear", "linear"]:
            #      sigma2_f = 0.5
            #      sigma2_i = 0.5
            #      c = (sigma2_f * c_prev + sigma2_i * j)
            c = (self._sigma2_f * c_prev + self._sigma2_i * j)
        elif self._gate_mod in ["gcnn", "cnn"]:
            sigma2_i = 1
            c = sigma2_i * j
        else:
            raise NotImplementedError("Invalid gate_mod: {0}".format(
                self._gate_mod))

        if self._layer_norm:
            c = tf.contrib.layers.layer_norm(c)

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type

        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        elif self._gate_mod == "lstm":
            m = sigmoid(o) * self._activation(c)
        elif self._gate_mod in [
                "rkm_lstm", "rkm_cifg", "gated_linear", "gcnn"
        ]:
            m = sigmoid(o) * c
        elif self._gate_mod in ["linear", "cnn"]:
            m = self._activation(c)
        else:
            raise NotImplementedError("Invalid gate_mod: {0}".format(
                self._gate_mod))

        if self._num_proj is not None:
            m = math_ops.matmul(m, self._proj_kernel)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state
예제 #6
0
 def state_size(self):
     cs_size = self.num_units * 1
     return (LSTMStateTuple(cs_size, 1 *
                            self.num_units) if self._state_is_tuple else 1 *
             self.num_units)
예제 #7
0
    def build(self):
        tf.reset_default_graph()
        config = self.build_config
        embeddings = []

        rnn_out_drop = tf.get_variable(name='rnn_out_drop',
                                       trainable=False,
                                       initializer=config.rnn_out_drop)
        endings_inp_drop = tf.get_variable(name='endings_inp_drop',
                                           trainable=False,
                                           initializer=config.endings_inp_drop)
        gram_inp_drop = tf.get_variable(name='gram_inp_drop',
                                        trainable=False,
                                        initializer=config.gram_inp_drop)
        rnn_state_drop = tf.get_variable(name='rnn_state_drop',
                                         trainable=False,
                                         initializer=config.rnn_state_drop)
        dense_drop = tf.get_variable(name='dense_drop',
                                     trainable=False,
                                     initializer=config.dense_drop)
        self.training = tf.get_variable(name='is_training',
                                        trainable=False,
                                        dtype=tf.bool,
                                        initializer=True)
        self.lr = tf.get_variable(name='lr',
                                  initializer=self.train_config.lr,
                                  trainable=False)
        tf.summary.scalar('lr__', self.lr)

        self.weights = tf.placeholder(dtype=tf.float32,
                                      shape=[None, None],
                                      name='weights')
        weights = tf.reshape(self.weights, shape=[-1])
        total = tf.reduce_sum(weights)
        self.total = total
        self.global_step = tf.train.get_or_create_global_step()

        if config.use_endings:
            with tf.variable_scope('word_endings'):
                voc_size = self.endings_vectorizer.get_size()
                self.endings_input = tf.placeholder(dtype=tf.int32,
                                                    shape=[None, None],
                                                    name='endings_input')
                self.endings_embedding = tf.get_variable(
                    'endings_embs',
                    shape=[voc_size, config.endings_emb_size],
                    initializer=tf.initializers.random_normal)
                endings_input = tf.nn.embedding_lookup(self.endings_embedding,
                                                       self.endings_input)
                endings_input = tf.nn.dropout(endings_input,
                                              keep_prob=1. - endings_inp_drop)
                embeddings.append(endings_input)

        if config.use_gram:
            with tf.variable_scope('grammems'):
                gram_vec_size = self.grammeme_vectorizer_input.grammemes_count(
                )
                self.grammems_input = tf.placeholder(
                    dtype=tf.float32,
                    shape=[None, None, gram_vec_size],
                    name='grammems_input')
                grammems_input = tf.nn.dropout(self.grammems_input,
                                               keep_prob=1. - gram_inp_drop)
                grammems_input = self.dense_layer(
                    in_size=gram_vec_size,
                    out_size=config.gram_hidden_size,
                    name='gram_embs',
                    inputs=grammems_input,
                    activation='relu')
                embeddings.append(grammems_input)

        if len(embeddings) > 1:
            embeddings = tf.concat(embeddings,
                                   axis=-1,
                                   name='concatenated_inputs')
        else:
            embeddings = embeddings[0]
        batch_size = tf.shape(embeddings, name='batch_size')[0]

        with tf.variable_scope('lstm_input'):
            lstm_input = tf.get_variable(
                name='lstm_input',
                shape=[
                    embeddings.get_shape().as_list()[-1],
                    config.rnn_hidden_size
                ])
            lstm_input_bias = tf.get_variable(name='lstm_input_bias',
                                              shape=[config.rnn_hidden_size])
            lstm_input = tf.tensordot(embeddings, lstm_input, axes=(
                (-1), (0))) + lstm_input_bias
            lstm_input = tf.nn.relu(lstm_input)

        with tf.variable_scope('lstm'):
            if config.learn_init_state:
                initial_state_forward = tf.get_variable(
                    name='f_initial_state_1',
                    shape=[config.rnn_hidden_size * 2])
                initial_state_backward = tf.get_variable(
                    'b_initial_state_1', shape=[config.rnn_hidden_size * 2])

                f_init_state_c = tf.expand_dims(
                    initial_state_forward[:config.rnn_hidden_size], axis=0)
                f_init_state_m = tf.expand_dims(
                    initial_state_forward[config.rnn_hidden_size:], axis=0)
                b_init_state_c = tf.expand_dims(
                    initial_state_backward[:config.rnn_hidden_size], axis=0)
                b_init_state_m = tf.expand_dims(
                    initial_state_backward[config.rnn_hidden_size:], axis=0)

                f_init_state_c = tf.tile(f_init_state_c,
                                         multiples=[batch_size, 1])
                f_init_state_m = tf.tile(f_init_state_m,
                                         multiples=[batch_size, 1])
                b_init_state_c = tf.tile(b_init_state_c,
                                         multiples=[batch_size, 1])
                b_init_state_m = tf.tile(b_init_state_m,
                                         multiples=[batch_size, 1])

                f_init_state = LSTMStateTuple(f_init_state_c, f_init_state_m)
                b_init_state = LSTMStateTuple(b_init_state_c, b_init_state_m)
            else:
                f_init_state = None
                b_init_state = None

            f_lstm_cell = tf.nn.rnn_cell.LSTMCell(config.rnn_hidden_size,
                                                  name='f_lstm_cell_1')
            b_lstm_cell = tf.nn.rnn_cell.LSTMCell(config.rnn_hidden_size,
                                                  name='b_lstm_cell_1')
            f_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                f_lstm_cell,
                state_keep_prob=1. - rnn_state_drop,
                output_keep_prob=1. - rnn_out_drop,
                seed=config.seed)
            b_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                b_lstm_cell,
                state_keep_prob=1. - rnn_state_drop,
                output_keep_prob=1. - rnn_out_drop,
                seed=config.seed)

            (f_outputs, b_outputs), _ = bidirectional_dynamic_rnn(
                f_lstm_cell,
                b_lstm_cell,
                lstm_input,
                dtype=tf.float32,
                initial_state_fw=f_init_state,
                initial_state_bw=b_init_state)

            def merge_mode(forward, backward):
                if config.merge_mode == 'ave':
                    outputs = tf.reduce_mean(tf.stack([forward, backward],
                                                      axis=0),
                                             axis=0)
                elif config.merge_mode == 'concat':
                    outputs = tf.concat([forward, backward],
                                        axis=-1,
                                        name='rnn_layer_outputs')
                elif config.merge_mode == 'sum':
                    outputs = tf.reduce_sum(tf.stack([forward, backward],
                                                     axis=0),
                                            axis=0)
                else:
                    raise ValueError()
                return outputs

            outputs = merge_mode(f_outputs, b_outputs)

            # self.first_layer_outputs = outputs  # [bs, seq_len, rnn_hidden_size (2 * rnn_hidden_size)]

            def make_cell(size, name):
                f_cell = tf.nn.rnn_cell.LSTMCell(size, name='f_' + name)
                b_cell = tf.nn.rnn_cell.LSTMCell(size, name='b_' + name)
                f_cell = tf.nn.rnn_cell.DropoutWrapper(
                    f_cell,
                    output_keep_prob=rnn_out_drop,
                    state_keep_prob=1. - rnn_state_drop)
                b_cell = tf.nn.rnn_cell.DropoutWrapper(
                    b_cell,
                    output_keep_prob=rnn_out_drop,
                    state_keep_prob=1. - rnn_state_drop)
                return (f_cell, b_cell)

            extra_rnn_layers = config.n_rnn_layers - 1
            if extra_rnn_layers > 0:
                if config.learn_init_state:
                    initial_state_forward = tf.get_variable(
                        'f_initial_state_2',
                        shape=[config.rnn_hidden_size * 2])
                    initial_state_backward = tf.get_variable(
                        'b_initial_state_2',
                        shape=[config.rnn_hidden_size * 2])

                    f_init_state_c = tf.expand_dims(
                        initial_state_forward[:config.rnn_hidden_size], axis=0)
                    f_init_state_m = tf.expand_dims(
                        initial_state_forward[config.rnn_hidden_size:], axis=0)
                    b_init_state_c = tf.expand_dims(
                        initial_state_backward[:config.rnn_hidden_size],
                        axis=0)
                    b_init_state_m = tf.expand_dims(
                        initial_state_backward[config.rnn_hidden_size:],
                        axis=0)

                    f_init_state_c = tf.tile(f_init_state_c,
                                             multiples=[batch_size, 1])
                    f_init_state_m = tf.tile(f_init_state_m,
                                             multiples=[batch_size, 1])
                    b_init_state_c = tf.tile(b_init_state_c,
                                             multiples=[batch_size, 1])
                    b_init_state_m = tf.tile(b_init_state_m,
                                             multiples=[batch_size, 1])

                    f_init_state = LSTMStateTuple(f_init_state_c,
                                                  f_init_state_m)
                    b_init_state = LSTMStateTuple(b_init_state_c,
                                                  b_init_state_m)

                    f_init_state = tuple([f_init_state] * extra_rnn_layers)
                    b_init_state = tuple([b_init_state] * extra_rnn_layers)
                else:
                    f_init_state = None
                    b_init_state = None

                cells = [
                    make_cell(config.rnn_hidden_size,
                              name=f'lstm_cell_{i + 2}')
                    for i in range(extra_rnn_layers)
                ]
                f_cells = [x for (x, y) in cells]
                b_cells = [y for (x, y) in cells]
                f_cell = tf.nn.rnn_cell.MultiRNNCell(f_cells,
                                                     state_is_tuple=True)
                b_cell = tf.nn.rnn_cell.MultiRNNCell(b_cells,
                                                     state_is_tuple=True)

                (f_rnn_outputs, b_rnn_outputs), _ = bidirectional_dynamic_rnn(
                    f_cell,
                    b_cell,
                    outputs,
                    dtype=tf.float32,
                    initial_state_fw=f_init_state,
                    initial_state_bw=b_init_state)
                outputs = merge_mode(
                    f_rnn_outputs,
                    b_rnn_outputs)  # [bs, seq_len, rnn_size (2 * rnn_size)]

        with tf.variable_scope('after_lstm'):
            rnn_output_size = config.rnn_hidden_size if config.merge_mode != 'concat' else (
                config.rnn_hidden_size * 2)

            outputs = self.dense_layer(rnn_output_size, config.dense_size,
                                       'dense_post_rnn', outputs)
            outputs = tf.nn.dropout(outputs, keep_prob=1. - dense_drop)
            outputs = tf.contrib.layers.batch_norm(inputs=outputs,
                                                   updates_collections=None)
            outputs = tf.nn.relu(outputs)

        if config.use_pos_lm:
            with tf.variable_scope('next_pos'):
                self.next_pos_target = tf.placeholder(dtype=tf.int32,
                                                      shape=[None, None])
                next_pos = self.dense_layer(config.rnn_hidden_size,
                                            config.dense_size,
                                            'dense_next_pos', f_outputs,
                                            'relu')
                next_pos = self.dense_layer(
                    config.dense_size,
                    self.grammeme_vectorizer_output.pos_count() + 1,
                    'next_pos', next_pos, 'softmax')
                next_pos_loss = sequence_loss(logits=next_pos,
                                              targets=self.next_pos_target,
                                              weights=self.weights,
                                              average_across_timesteps=False,
                                              average_across_batch=False,
                                              name='next_pos_loss')
                next_pos_loss = tf.reshape(next_pos_loss, shape=[-1])
                next_pos_loss *= weights
                self.next_pos_loss = tf.reduce_sum(next_pos_loss)
                self.next_pos_loss_avg = self.next_pos_loss / total
                tf.summary.scalar('next_pos_loss__', self.next_pos_loss_avg)

            with tf.variable_scope('pred_pos'):
                self.pred_pos_target = tf.placeholder(dtype=tf.int32,
                                                      shape=[None, None])
                pred_pos = self.dense_layer(config.rnn_hidden_size,
                                            config.dense_size,
                                            'dense_pred_pos', b_outputs,
                                            'relu')
                pred_pos = self.dense_layer(
                    config.dense_size,
                    self.grammeme_vectorizer_output.pos_count() + 1,
                    'pred_pos', pred_pos, 'softmax')

                pred_pos_loss = sequence_loss(logits=pred_pos,
                                              targets=self.pred_pos_target,
                                              weights=self.weights,
                                              average_across_timesteps=False,
                                              average_across_batch=False,
                                              name='pred_pos_loss')
                pred_pos_loss = tf.reshape(pred_pos_loss, shape=[-1])
                pred_pos_loss *= weights
                self.pred_pos_loss = tf.reduce_sum(pred_pos_loss)
                self.pred_pos_loss_avg = self.pred_pos_loss / total
                tf.summary.scalar('pred_pos_loss__', self.pred_pos_loss_avg)

        with tf.variable_scope('main_pred'):
            self.target = tf.placeholder(dtype=tf.int32, shape=[None, None])
            outputs = self.dense_layer(
                config.dense_size,
                self.grammeme_vectorizer_output.get_size() + 1, 'main_pred',
                outputs, 'softmax')
            main_loss = sequence_loss(logits=outputs,
                                      targets=self.target,
                                      weights=self.weights,
                                      average_across_timesteps=False,
                                      average_across_batch=False,
                                      name='main_loss')
            main_loss = tf.reshape(main_loss, shape=[-1])
            main_loss *= weights
            self.main_loss = tf.reduce_sum(main_loss)
            self.main_loss_avg = self.main_loss / total
            tf.summary.scalar('main_loss__', self.main_loss_avg)
            targets = tf.reshape(self.target, shape=[-1])
            predictions = tf.cast(tf.reshape(tf.argmax(outputs, axis=-1),
                                             shape=[-1]),
                                  dtype=tf.int32)
            correct = tf.cast(tf.equal(predictions, targets), dtype=tf.float32)
            self.correct = tf.reduce_sum(correct * weights)
            self.accuracy = divide(self.correct,
                                   total + 1e-12,
                                   name='accuracy')
            tf.summary.scalar('accuracy__', self.accuracy)

        self.summaries = tf.summary.merge_all()

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        if self.is_training:
            with tf.control_dependencies(update_ops):

                def get_optimizer(build_config, lr):
                    if build_config.optimizer.lower() == 'adam':
                        optimizer = tf.train.AdamOptimizer(lr)
                    elif build_config.optimizer.lower() == 'sgd':
                        optimizer = tf.train.GradientDescentOptimizer(lr)
                    elif build_config.optimizer.lower() == 'rmsprop':
                        optimizer = tf.train.RMSPropOptimizer(lr)
                    elif build_config.optimizer.lower() == 'adagrad':
                        optimizer = tf.train.AdagradOptimizer(lr)
                    else:
                        raise ValueError()
                    return optimizer

                optimizer = get_optimizer(self.build_config, self.lr)

                trainable_variables = tf.trainable_variables()
                loss = tf.constant(0.0, dtype=tf.float32)
                if config.use_pos_lm:
                    loss += self.pred_pos_loss_avg + self.next_pos_loss_avg
                loss += self.main_loss_avg
                if config.use_wd:
                    self.wd = tf.get_variable(name='weight_decay',
                                              initializer=config.wd,
                                              trainable=False)
                    l2_loss = tf.constant(0.0, dtype=tf.float32)
                    for var in tf.trainable_variables():
                        l2_loss += tf.nn.l2_loss(var)
                    loss = loss + l2_loss * self.wd

                grads = tf.gradients(loss, trainable_variables)
                if self.build_config.clip_norm is not None:
                    self.clip_norm = tf.get_variable(
                        name='clip_norm',
                        initializer=self.build_config.clip_norm,
                        trainable=False)
                    grads = [
                        tf.clip_by_norm(grad, self.clip_norm) for grad in grads
                    ]
                self.train_op = optimizer.apply_gradients(
                    zip(grads, trainable_variables),
                    global_step=self.global_step,
                    name='train_op')

        self.variables_to_save = {
            'lr': self.lr,
            'global_step': self.global_step
        }
        self.variables_to_save.update(
            dict([(x.op.name, x) for x in tf.trainable_variables()]))
        self.saver = tf.train.Saver(self.variables_to_save)
예제 #8
0
    def __call__(self, input, state, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            c, m, last_prior_mu, last_prior_sigma, = state  # TODO: why shall we apply c instead of m
            x, y, last_input_prior_mu, last_input_prior_sigma, train_flag_ph = \
                tf.split(value=input,
                         num_or_size_splits=[self.n_x, self.n_y, self.n_z, self.n_z, 1], axis=1)
            train_flag_ph = tf.cast(tf.squeeze(train_flag_ph), tf.bool)

            with tf.variable_scope("phi_y"):
                y_phi = linear(y, self.n_h)

            with tf.variable_scope("Prior"):
                with tf.variable_scope("hidden"):
                    prior_hidden = tf.nn.relu(
                        linear(tf.concat(values=[y_phi, m], axis=1),
                               self.n_prior_hidden))
                with tf.variable_scope("delta_mu"):
                    delta_prior_mu = linear(prior_hidden, self.n_z)
                with tf.variable_scope("mu"):
                    prior_mu = delta_prior_mu + last_prior_mu + last_input_prior_mu
                    # last_input_prior_mu is a zero matrix except the first time step
                with tf.variable_scope("delta_sigma"):
                    delta_prior_sigma = linear(prior_hidden, self.n_z)
                with tf.variable_scope("sigma"):
                    prior_sigma = delta_prior_sigma + last_prior_sigma + last_input_prior_sigma
                    # last_input_prior_sigma is a zero matrix except the first time step
                    prior_sigma = tf.nn.softplus(prior_sigma)

            # lambda_prior = tf.nn.relu(linear(tf.concat(values=[y_phi, m], axis=1), self.n_prior_hidden))

            with tf.variable_scope("cond_x"):
                xy = tf.concat(values=(x, linear(y, self.n_h)), axis=1)
            with tf.variable_scope("phi_x"):
                xy_phi = tf.nn.relu(linear(xy, self.n_h))

            with tf.variable_scope("Encoder"):
                with tf.variable_scope("hidden"):
                    enc_hidden = tf.nn.relu(
                        linear(tf.concat(axis=1, values=(xy_phi, m)),
                               self.n_enc_hidden))
                with tf.variable_scope("mu"):
                    enc_mu = linear(enc_hidden, self.n_z)
                with tf.variable_scope("sigma"):
                    enc_sigma = tf.nn.softplus(linear(enc_hidden, self.n_z))
            # print x.get_shape().as_list()
            # eps = tf.random_normal((x.get_shape().as_list()[0], self.n_z), 0.0, 1.0, dtype=tf.float32)
            eps1 = tf.random_normal((tf.shape(x)[0], self.n_z),
                                    0.0,
                                    1.0,
                                    dtype=tf.float32)
            # z = mu + sigma*epsilon
            z_encoder = tf.add(enc_mu, tf.multiply(enc_sigma, eps1))
            z_prior = tf.add(prior_mu, tf.multiply(prior_sigma, eps1))
            with tf.variable_scope("cond_z"):
                z = tf.where(train_flag_ph, x=z_encoder, y=z_prior)
                zy = tf.concat(values=(z, linear(y, self.n_h)), axis=1)
            with tf.variable_scope("Phi_z"):
                zy_phi = tf.nn.relu(linear(zy, self.n_h))

            with tf.variable_scope("Decoder"):
                with tf.variable_scope("hidden"):
                    dec_hidden_enc = tf.nn.relu(
                        linear(tf.concat(axis=1, values=(zy_phi, m)),
                               self.n_dec_hidden))
                with tf.variable_scope("mu"):
                    dec_mu = linear(dec_hidden_enc, self.n_x)
                with tf.variable_scope("sigma"):
                    dec_sigma = tf.nn.softplus(linear(dec_hidden_enc,
                                                      self.n_x))
                with tf.variable_scope("rho"):
                    dec_rho = tf.nn.sigmoid(linear(dec_hidden_enc, self.n_x))

            eps2 = tf.random_normal((tf.shape(x)[0], self.n_x),
                                    0.0,
                                    1.0,
                                    dtype=tf.float32)
            dec_x = tf.add(dec_mu, tf.multiply(dec_sigma, eps2))

            output, state_update = self.lstm(
                tf.concat(axis=1, values=(xy_phi, zy_phi)),
                LSTMStateTuple(c, m))
            # TODO: recheck it
        # return tf.nn.rnn_cell.LSTMStateTuple(h=(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma), c=state2)
        cell_output = tf.concat(values=(enc_mu, enc_sigma, dec_mu, dec_sigma,
                                        dec_x, prior_mu, prior_sigma,
                                        z_encoder),
                                axis=1)
        c_update, m_update = state_update
        tpp_cvrnn_state = TPPCVRNNStateTuple(c=c_update,
                                             h=m_update,
                                             mu_p=prior_mu,
                                             sigma_p=prior_sigma)
        return cell_output, tpp_cvrnn_state
예제 #9
0
파일: GW_VAE.py 프로젝트: youngflyasd/TSCI
dic_embeddings = tf.constant(dic_em())

# Encoder
encode_outputs, encode_states, z_mean, z_stddev, new_states = get_encoder_layer(
    encoder_embed_input, keep_prob)
#VAE

samples = tf.random_normal(tf.shape(z_stddev))
z = z_mean + tf.exp(z_stddev * 0.5) * samples
#Decoder
# inital state vae_z
h_state = tf.nn.softplus(tf.matmul(z, weights_de['w_']) +
                         biases_de['b_'])  # tf.nn.relu

decoder_initial_state = LSTMStateTuple(h_state, encode_states[0][1])

decoder_output, predicting_logits, training_logits, masks, target = get_decoder_layer(
    z, decoder_embed_input, decoder_initial_state, keep_prob, is_train)
latent_loss = 0.5 * tf.reduce_sum(
    tf.exp(z_stddev) - 1. - z_stddev + tf.square(z_mean), 1)
#variable
# a=tf.reduce_sum(tf.exp(z_stddev),1)
# b=tf.reduce_sum(z_stddev,1)
# c=tf.reduce_sum(tf.square(z_mean),1)
latent_cost = tf.reduce_mean(latent_loss)
laten_ = latentscale_iter * tf.reduce_mean(latent_loss)

encropy_loss = tf.contrib.seq2seq.sequence_loss(training_logits, target, masks)
cost = tf.reduce_mean(
    tf.contrib.seq2seq.sequence_loss(training_logits, target, masks) +
예제 #10
0
    def call(self, inputs, state):
        char_inputs = inputs[0]  # shape = [batch_size, input_dimension]
        state_inputs = inputs[
            1]  # shape = [batch_size, max_num_of_lexicon words, lexicon_state_dimension]

        # check whether the last dimension of state_inputs are all zero.
        # check_state_0 should be in the shape of [batch_size, max_num_of_lexicon words]
        check_state_0 = tf.reduce_sum(state_inputs, axis=-1)
        # check_state_1 should be in the shape of [batch_size]
        check_state_1 = tf.reduce_sum(check_state_0, axis=-1)

        # 查找匹配含有词汇的索引,只处理该部分信息,避免较多无词库匹配的信息参与计算消耗资源
        # state_inputs_indices_for_lexicon should be in the shape of [batch_size, 2]
        state_inputs_indices_for_lexicon = tf.where(
            tf.not_equal(check_state_0, 0))

        # 查找不含有词汇的索引,避免较多无词库匹配的信息参与计算消耗资源
        # tf.where(tf.equal(check_state_1, 0)) should be in the shape of [batch_size, 1]
        # state_inputs_indices_for_not_lexicon should be in the shape of [batch_size]
        state_inputs_indices_for_not_lexicon = tf.squeeze(
            tf.where(tf.equal(check_state_1, 0)))

        # 对不含词汇的细胞状态进行选择,主要是针对标量数据,因其秩为0,需进行维度扩展
        # in case `[i]` is squeezed to scalar `i`, change it back to 1-dimension tensor `[i]` by `tf.expand_dims()`
        # otherwise, `[]` and `[i, j]` will remain as-is after tf.squeeze() and further conversion on it
        state_inputs_indices_for_not_lexicon = tf.cond(
            pred=tf.equal(tf.rank(state_inputs_indices_for_not_lexicon), 0),
            true_fn=lambda: tf.expand_dims(
                state_inputs_indices_for_not_lexicon, axis=0),
            false_fn=lambda: state_inputs_indices_for_not_lexicon)

        # 含有词汇匹配的字符索引
        # char_inputs_indices_for_lexicon should be in the shape of [batch_size, 1]
        char_inputs_indices_for_lexicon = tf.where(
            tf.not_equal(check_state_1, 0))

        # 不含有词汇匹配的字符索引
        # char_inputs_indices_for_not_lexicon should be in the shape of [batch_size, 1]
        char_inputs_indices_for_not_lexicon = tf.where(
            tf.equal(check_state_1, 0))

        if self._state_is_tuple:
            c, h = state
        else:
            c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

        # tf.concat([char_inputs, h], 1) should be in the shape of
        # [batch_size, char_embedding_size + state_dimension]
        # h should be in the shape of [batch_size, state_dimension]
        # self._kernel should be in the shape of [char_embedding_size + state_dimension, X]
        # gate_inputs should be in the shape of [batch_size, 4 * state_dimension]
        gate_inputs = tf.matmul(tf.concat([char_inputs, h], 1), self._kernel)
        gate_inputs = tf.nn.bias_add(gate_inputs, self._bias)

        i, j, f, o = tf.split(value=gate_inputs, num_or_size_splits=4, axis=1)

        new_c_without_lexicon = self._new_c_without_lexicon(
            i=i,
            f=f,
            j=j,
            c=c,
            indices_tensor=state_inputs_indices_for_not_lexicon)
        new_c = tf.scatter_nd_update(
            self._char_state_tensor,
            indices=char_inputs_indices_for_not_lexicon,
            updates=new_c_without_lexicon)

        new_c = tf.cond(tf.not_equal(
            tf.shape(state_inputs_indices_for_not_lexicon)[-1],
            tf.shape(state_inputs)[0]),
                        true_fn=lambda: self._if_not_empty_lexicon_state(
                            i, j, char_inputs, state_inputs,
                            char_inputs_indices_for_lexicon,
                            state_inputs_indices_for_lexicon, new_c),
                        false_fn=lambda: new_c)

        # 计算输出隐状态
        new_h = tf.multiply(self._activation(new_c), tf.nn.sigmoid(o))

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat([new_c, new_h], 1)

        return new_h, new_state
예제 #11
0
def model_fn(features, labels, mode, params, config):
    cur_batch_D = params.num_char

    if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
        X_s, X_l, X_r, X_u = features
        cur_batch_B = tf.shape(X_s)[0]
        cur_batch_T = tf.shape(X_s)[1]

        Xs_embd = tf.one_hot(X_s, cur_batch_D)
        X_ta = tf.TensorArray(size=cur_batch_T, dtype=tf.float32).unstack(
            _transpose_batch_time(Xs_embd), 'TBD_Formatted_X')
    else:
        cur_batch_B = params.infer_batch_size
        cur_batch_T = params.infer_seq_length

    acell = {
        'lstm': lambda: LSTMCell(params.num_hidden),
        'sru': lambda: SRUCell(params.num_hidden)
    }[params.cell]()

    output_layer_info = {
        'units': cur_batch_D,  # this is the size of vocabulary
        'name': 'out_to_character',
        # linear 'activation': tf.nn.softmax
    }

    with tf.variable_scope('Shared_Dense', reuse=False) as dense_layer_scope:
        # this will be replaced by the cell_output later
        zeros_placeholder = tf.zeros([1, acell.output_size])
        tf.layers.dense(zeros_placeholder, **output_layer_info)

    def get_logits(cell_out):
        # useful when measuring the cross-entropy loss
        with tf.variable_scope(dense_layer_scope, reuse=True):
            return tf.layers.dense(cell_out, **output_layer_info)

    def get_dist(cell_out):
        return Categorical(logits=get_logits(cell_out),
                           name='categorical_dist',
                           allow_nan_stats=False,
                           dtype=tf.int32)

    def get_sample(cell_out):
        return tf.one_hot(get_dist(cell_out).sample(), cur_batch_D)

    def get_prob(cell_out, obs):
        # the observation is in
        return get_dist(cell_out).prob(obs)

    with tf.variable_scope('Initial_State'):
        h_init = tf.tile(
            tf.get_variable('init_state_h', [1, params.num_hidden],
                            initializer=tf.random_uniform_initializer(0)),
            [cur_batch_B, 1])
        c_init = tf.tile(
            tf.get_variable('init_state_c', [1, params.num_hidden],
                            initializer=tf.random_uniform_initializer(0)),
            [cur_batch_B, 1])
        cell_init_state = LSTMStateTuple(c_init, h_init)

        first_step = tf.zeros(shape=[cur_batch_B, cur_batch_D],
                              dtype=tf.float32,
                              name='first_character')

    with tf.name_scope('NADE'):
        output_ta = tf.TensorArray(size=cur_batch_T, dtype=tf.float32)

        def loop_fn(time, cell_output, cell_state, loop_state):
            emit_output = cell_output  # == None for time == 0

            if cell_output is None:
                next_cell_state = cell_init_state
                next_step = first_step
                next_loop_state = output_ta
            else:  # pass the last state to the next
                next_cell_state = cell_state
                if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
                    next_step = X_ta.read(time - 1)
                else:
                    next_step = get_sample(cell_output)
                next_loop_state = loop_state.write(time - 1, next_step)

            if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
                elements_finished = (time >= X_l)
            else:
                elements_finished = (time >= cur_batch_T)

            return elements_finished, next_step, next_cell_state, emit_output, next_loop_state

        output_ta, _, loop_state_ta = tf.nn.raw_rnn(acell, loop_fn)

    with tf.name_scope('Output'):
        outputs = _transpose_batch_time(output_ta.stack())
        logits = get_logits(outputs)

    if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL:
        logp_loss = -tf.reduce_mean(tf.log(1e-6 + get_prob(outputs, X_s)))
        xentropy_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            labels=Xs_embd, logits=logits),
                                       name='xtropy_loss')

        train_op = tf.train.RMSPropOptimizer(
            learning_rate=params.learning_rate).minimize(
                loss=logp_loss, global_step=tf.train.get_global_step())

        logging_hook = tf.train.LoggingTensorHook(
            tensors={"xtropy_loss": "xtropy_loss"}, every_n_iter=100)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=logp_loss,
                                          train_op=train_op,
                                          training_chief_hooks=[logging_hook])
    else:
        X_sampled = tf.argmax(_transpose_batch_time(loop_state_ta.stack()),
                              axis=2)

        return tf.estimator.EstimatorSpec(mode=mode, predictions=X_sampled)
예제 #12
0
def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
                   target_dict_dim, is_generating, beam_size,
                   max_generation_length):
    src_word_idx = tf.placeholder(tf.int32, shape=[None, None])
    src_sequence_length = tf.placeholder(tf.int32, shape=[
        None,
    ])

    src_embedding_weights = tf.get_variable("source_word_embeddings",
                                            [source_dict_dim, embedding_dim])
    src_embedding = tf.nn.embedding_lookup(src_embedding_weights, src_word_idx)

    src_forward_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_size)
    src_reversed_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_size)
    # no peephole
    encoder_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=src_forward_cell,
        cell_bw=src_reversed_cell,
        inputs=src_embedding,
        sequence_length=src_sequence_length,
        dtype=tf.float32)

    # concat the forward outputs and backward outputs
    encoded_vec = tf.concat(encoder_outputs, axis=2)

    # project the encoder outputs to size of decoder lstm
    encoded_proj = tf.contrib.layers.fully_connected(inputs=tf.reshape(
        encoded_vec, shape=[-1, embedding_dim * 2]),
                                                     num_outputs=decoder_size,
                                                     activation_fn=None,
                                                     biases_initializer=None)
    encoded_proj_reshape = tf.reshape(
        encoded_proj, shape=[-1, tf.shape(encoded_vec)[1], decoder_size])

    # get init state for decoder lstm's H
    backword_first = tf.slice(encoder_outputs[1], [0, 0, 0], [-1, 1, -1])
    decoder_boot = tf.contrib.layers.fully_connected(inputs=tf.reshape(
        backword_first, shape=[-1, embedding_dim]),
                                                     num_outputs=decoder_size,
                                                     activation_fn=tf.nn.tanh,
                                                     biases_initializer=None)

    # prepare the initial state for decoder lstm
    cell_init = tf.zeros(tf.shape(decoder_boot), tf.float32)
    initial_state = LSTMStateTuple(cell_init, decoder_boot)

    # create decoder lstm cell
    decoder_cell = LSTMCellWithSimpleAttention(
        decoder_size,
        encoded_vec if not is_generating else seq2seq.tile_batch(
            encoded_vec, beam_size),
        encoded_proj_reshape if not is_generating else seq2seq.tile_batch(
            encoded_proj_reshape, beam_size),
        src_sequence_length if not is_generating else seq2seq.tile_batch(
            src_sequence_length, beam_size),
        forget_bias=0.0)

    output_layer = Dense(target_dict_dim, name='output_projection')

    if not is_generating:
        trg_word_idx = tf.placeholder(tf.int32, shape=[None, None])
        trg_sequence_length = tf.placeholder(tf.int32, shape=[
            None,
        ])
        trg_embedding_weights = tf.get_variable(
            "target_word_embeddings", [target_dict_dim, embedding_dim])
        trg_embedding = tf.nn.embedding_lookup(trg_embedding_weights,
                                               trg_word_idx)

        training_helper = seq2seq.TrainingHelper(
            inputs=trg_embedding,
            sequence_length=trg_sequence_length,
            time_major=False,
            name='training_helper')

        training_decoder = seq2seq.BasicDecoder(cell=decoder_cell,
                                                helper=training_helper,
                                                initial_state=initial_state,
                                                output_layer=output_layer)

        # get the max length of target sequence
        max_decoder_length = tf.reduce_max(trg_sequence_length)

        decoder_outputs_train, _, _ = seq2seq.dynamic_decode(
            decoder=training_decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=max_decoder_length)

        decoder_logits_train = tf.identity(decoder_outputs_train.rnn_output)
        decoder_pred_train = tf.argmax(decoder_logits_train,
                                       axis=-1,
                                       name='decoder_pred_train')
        masks = tf.sequence_mask(lengths=trg_sequence_length,
                                 maxlen=max_decoder_length,
                                 dtype=tf.float32,
                                 name='masks')

        # place holder of label sequence
        lbl_word_idx = tf.placeholder(tf.int32, shape=[None, None])

        # compute the loss
        loss = seq2seq.sequence_loss(logits=decoder_logits_train,
                                     targets=lbl_word_idx,
                                     weights=masks,
                                     average_across_timesteps=True,
                                     average_across_batch=True)

        # return feeding list and loss operator
        return {
            'src_word_idx': src_word_idx,
            'src_sequence_length': src_sequence_length,
            'trg_word_idx': trg_word_idx,
            'trg_sequence_length': trg_sequence_length,
            'lbl_word_idx': lbl_word_idx
        }, loss
    else:
        start_tokens = tf.ones([
            tf.shape(src_word_idx)[0],
        ], tf.int32) * START_TOKEN_IDX
        # share the same embedding weights with target word
        trg_embedding_weights = tf.get_variable(
            "target_word_embeddings", [target_dict_dim, embedding_dim])

        inference_decoder = beam_search_decoder.BeamSearchDecoder(
            cell=decoder_cell,
            embedding=lambda tokens: tf.nn.embedding_lookup(
                trg_embedding_weights, tokens),
            start_tokens=start_tokens,
            end_token=END_TOKEN_IDX,
            initial_state=tf.nn.rnn_cell.LSTMStateTuple(
                tf.contrib.seq2seq.tile_batch(initial_state[0], beam_size),
                tf.contrib.seq2seq.tile_batch(initial_state[1], beam_size)),
            beam_width=beam_size,
            output_layer=output_layer)

        decoder_outputs_decode, _, _ = seq2seq.dynamic_decode(
            decoder=inference_decoder,
            output_time_major=False,
            #impute_finished=True,# error occurs
            maximum_iterations=max_generation_length)

        predicted_ids = decoder_outputs_decode.predicted_ids

        return {
            'src_word_idx': src_word_idx,
            'src_sequence_length': src_sequence_length
        }, predicted_ids
예제 #13
0
def Model(_abnormal_data, _abnormal_label, _hidden_num, _elem_num, _file_name,
          _partition):
    tf.reset_default_graph()
    g = tf.Graph()
    with g.as_default():
        # placeholder list
        p_input = tf.placeholder(tf.float32,
                                 shape=(batch_num, _abnormal_data.shape[1],
                                        _abnormal_data.shape[2]))
        # p_inputs = [tf.squeeze(t, [1]) for t in tf.split(p_input, _abnormal_data.shape[1], 1)]

        # Regularizer signature
        l1_regularizer = tf.contrib.layers.l1_regularizer(scale=0.005,
                                                          scope=None)

        # Projection layer
        projection_layer = tf.layers.Dense(units=_elem_num, use_bias=True)

        # with tf.device('/device:GPU:0'):
        d_enc = {}
        with tf.variable_scope('encoder'):
            for j in range(ensemble_space):
                # create RNN cell
                if cell_type == 0:
                    enc_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                if cell_type == 1:
                    pure_enc_cell = LSTMCell(_hidden_num)
                    residual_enc_cell = RLSTMCell(_hidden_num)
                    # enc_cell = RSLSTMCell(_hidden_num, file_name=_file_name, type='enc', partition=_partition,
                    #                       component=j, reuse=tf.AUTO_REUSE)
                    enc_cell = RKLSTMCell(_hidden_num,
                                          file_name=_file_name,
                                          type='enc',
                                          partition=_partition,
                                          component=j,
                                          reuse=tf.AUTO_REUSE)
                if cell_type == 2:
                    pure_enc_cell = GRUCell(_hidden_num)
                    enc_cell = RSGRUCell(_hidden_num)
                if j == 0:
                    d_enc['enc_output_{0}'.format(j)], d_enc[
                        'enc_state_{0}'.format(j)] = tf.nn.dynamic_rnn(
                            pure_enc_cell, p_input, dtype=tf.float32)

                elif j == 1:
                    d_enc['enc_output_{0}'.format(j)], d_enc[
                        'enc_state_{0}'.format(j)] = tf.nn.dynamic_rnn(
                            residual_enc_cell, p_input, dtype=tf.float32)

                else:
                    d_enc['enc_output_{0}'.format(j)], d_enc[
                        'enc_state_{0}'.format(j)] = tf.nn.dynamic_rnn(
                            enc_cell, p_input, dtype=tf.float32)

            # shared_state_c = tf.concat([d_enc['enc_state_{0}'.format(j)].c for j in range(ensemble_space)], axis=1)
            # shared_state_h = tf.concat([d_enc['enc_state_{0}'.format(j)].h for j in range(ensemble_space)], axis=1)
            w_c = tf.Variable(tf.zeros([_hidden_num, _hidden_num]))
            b_c = tf.Variable(tf.zeros([_hidden_num]))
            w_h = tf.Variable(tf.zeros([_hidden_num, _hidden_num]))
            b_h = tf.Variable(tf.zeros([_hidden_num]))
            shared_state_c = tf.concat([
                tf.matmul(d_enc['enc_state_{0}'.format(j)].c, w_c) + b_c
                for j in range(ensemble_space)
            ],
                                       axis=1)
            shared_state_h = tf.concat([
                tf.matmul(d_enc['enc_state_{0}'.format(j)].h, w_h) + b_h
                for j in range(ensemble_space)
            ],
                                       axis=1)

            if compress:
                compress_state = tf.layers.Dense(units=_hidden_num,
                                                 activation=tf.tanh,
                                                 use_bias=True)
                shared_state_c = compress_state(shared_state_c)
                shared_state_h = compress_state(shared_state_h)

            shared_state = LSTMStateTuple(shared_state_c, shared_state_h)

        # with tf.device('/device:GPU:1'):
        d_dec = {}
        with tf.variable_scope('decoder') as vs:
            if decode_without_input:
                dec_input = tf.zeros(
                    [p_input.shape[0], p_input.shape[1], p_input.shape[2]],
                    dtype=tf.float32)
                for k in range(ensemble_space):
                    # create RNN cell
                    if cell_type == 0:
                        dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                    if cell_type == 1:
                        if compress:
                            pure_dec_cell = LSTMCell(_hidden_num)
                            residual_dec_cell = RLSTMCell(_hidden_num)
                            dec_cell = RSLSTMCell(_hidden_num,
                                                  file_name=_file_name,
                                                  type='dec',
                                                  partition=_partition,
                                                  component=k,
                                                  reuse=tf.AUTO_REUSE)
                        else:
                            pure_dec_cell = LSTMCell(_hidden_num *
                                                     ensemble_space)
                            residual_dec_cell = RLSTMCell(_hidden_num *
                                                          ensemble_space)
                            dec_cell = RSLSTMCell(_hidden_num * ensemble_space,
                                                  file_name=_file_name,
                                                  type='dec',
                                                  partition=_partition,
                                                  component=k,
                                                  reuse=tf.AUTO_REUSE)
                    if cell_type == 2:
                        if compress:
                            pure_dec_cell = GRUCell(_hidden_num)
                            dec_cell = RSGRUCell(_hidden_num)
                        else:
                            pure_dec_cell = GRUCell(_hidden_num *
                                                    ensemble_space)
                            dec_cell = RSGRUCell(_hidden_num * ensemble_space)

                    if k == 0:
                        d_dec['dec_output_{0}'.format(k)], d_dec[
                            'dec_state_{0}'.format(k)] = tf.nn.dynamic_rnn(
                                pure_dec_cell,
                                dec_input,
                                initial_state=shared_state,
                                dtype=tf.float32)
                    elif k == 1:
                        d_dec['dec_output_{0}'.format(k)], d_dec[
                            'dec_state_{0}'.format(k)] = tf.nn.dynamic_rnn(
                                residual_dec_cell,
                                dec_input,
                                initial_state=shared_state,
                                dtype=tf.float32)
                    else:
                        d_dec['dec_output_{0}'.format(k)], d_dec[
                            'dec_state_{0}'.format(k)] = tf.nn.dynamic_rnn(
                                dec_cell,
                                dec_input,
                                initial_state=shared_state,
                                dtype=tf.float32)

                    if reverse:
                        d_dec['dec_output_{0}'.format(k)] = d_dec[
                            'dec_output_{0}'.format(k)][::-1]

            else:
                dec_input = tf.zeros([p_input.shape[0], p_input.shape[2]],
                                     dtype=tf.float32)
                for k in range(ensemble_space):
                    # create RNN cell
                    if cell_type == 0:
                        dec_cell = tf.nn.rnn_cell.BasicRNNCell(_hidden_num)
                    if cell_type == 1:
                        if compress:
                            pure_dec_cell = LSTMCell(_hidden_num)
                            residual_dec_cell = RLSTMCell(_hidden_num)
                            # dec_cell = RSLSTMCell(_hidden_num, file_name=_file_name, type='dec', partition=_partition,
                            #                       component=k, reuse=tf.AUTO_REUSE)
                            dec_cell = RKLSTMCell(_hidden_num,
                                                  file_name=_file_name,
                                                  type='dec',
                                                  partition=_partition,
                                                  component=k,
                                                  reuse=tf.AUTO_REUSE)
                        else:
                            pure_dec_cell = LSTMCell(_hidden_num *
                                                     ensemble_space)
                            residual_dec_cell = RLSTMCell(_hidden_num *
                                                          ensemble_space)
                            # dec_cell = RSLSTMCell(_hidden_num * ensemble_space, file_name=_file_name, type='dec',
                            #                       partition=_partition, component=k, reuse=tf.AUTO_REUSE)
                            dec_cell = RKLSTMCell(_hidden_num * ensemble_space,
                                                  file_name=_file_name,
                                                  type='dec',
                                                  partition=_partition,
                                                  component=k,
                                                  reuse=tf.AUTO_REUSE)
                    if cell_type == 2:
                        if compress:
                            pure_dec_cell = GRUCell(_hidden_num)
                            dec_cell = RSGRUCell(_hidden_num)
                        else:
                            pure_dec_cell = GRUCell(_hidden_num *
                                                    ensemble_space)
                            dec_cell = RSGRUCell(_hidden_num * ensemble_space)

                    inference_helper = tf.contrib.seq2seq.InferenceHelper(
                        sample_fn=lambda outputs: outputs,
                        sample_shape=[_elem_num],
                        sample_dtype=tf.float32,
                        start_inputs=dec_input,
                        end_fn=lambda sample_ids: False)
                    if k == 0:
                        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                            pure_dec_cell,
                            inference_helper,
                            shared_state,
                            output_layer=projection_layer)
                    elif k == 1:
                        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                            residual_dec_cell,
                            inference_helper,
                            shared_state,
                            output_layer=projection_layer)
                    else:
                        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                            dec_cell,
                            inference_helper,
                            shared_state,
                            output_layer=projection_layer)

                    d_dec['dec_output_{0}'.format(
                        k)], _, _ = tf.contrib.seq2seq.dynamic_decode(
                            inference_decoder,
                            impute_finished=True,
                            maximum_iterations=p_input.shape[1])

                    if reverse:
                        d_dec['dec_output_{0}'.format(k)] = d_dec[
                            'dec_output_{0}'.format(k)][::-1]

        sum_of_difference = 0
        for i in range(ensemble_space):
            sum_of_difference += d_dec['dec_output_{0}'.format(i)][0] - p_input

        loss = tf.reduce_mean(tf.square(sum_of_difference))
        regularization_penalty = tf.contrib.layers.apply_regularization(
            l1_regularizer, [shared_state])
        loss = loss + regularization_penalty
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(loss)
        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()
    return g, p_input, d_dec, loss, optimizer, saver
예제 #14
0
    def call(self, skel_inputs, state):
        '''
    here inputs with the shape of (batch_size, feat_dim)
    in kinect 2.0, feat_dim is 25*3 = 75
    for five part of a skeleton body.
    (head, r_arm, l_arm, r_leg, l_leg)
    divide config:
    head:  [ 3, 4,              1,2,21]
    r_arm: [ 5, 6, 7, 8,22,23,  1,2,21]
    l_arm: [ 9,10,11,12,24,25,  1,2,21]
    r_leg: [13,14,15,16,        1,2,21]
    l_leg: [17,18,19,20,        1,2,21]

    state: LSTMStateTuple with the format of (Tensor(c1, c2, ..., c5), Tensor(h))
    '''

        sigmoid = math_ops.sigmoid
        tanh = math_ops.tanh

        if self._state_is_tuple:
            cs, h = state
        else:
            cs, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)
        # split he state into c and h
        # here cs mean c1 to c5, where each part means a part of body, cs is also a list or turple

        # split the cs into 5 parts
        cs = array_ops.split(cs, num_or_size_splits=5, axis=1)

        divide_config = {
            'head': (3, 4, 1, 2, 21),
            'r_arm': (5, 6, 7, 8, 22, 23, 1, 2, 21),
            'l_arm': (9, 10, 11, 12, 24, 25, 1, 2, 21),
            'r_leg': (13, 14, 15, 16, 1, 2, 21),
            'l_leg': (17, 18, 19, 20, 1, 2, 21)
        }
        # assert skel_inputs.shape[1] == 75

        reshaped_input = array_ops.reshape(skel_inputs, shape=(-1, 25, 3))
        head_joints = [
            reshaped_input[:, each - 1, :] for each in divide_config['head']
        ]
        r_arm_joints = [
            reshaped_input[:, each - 1, :] for each in divide_config['r_arm']
        ]
        l_arm_joints = [
            reshaped_input[:, each - 1, :] for each in divide_config['l_arm']
        ]
        r_leg_joints = [
            reshaped_input[:, each - 1, :] for each in divide_config['r_leg']
        ]
        l_leg_joints = [
            reshaped_input[:, each - 1, :] for each in divide_config['l_leg']
        ]

        body_list = [
            head_joints, r_arm_joints, l_arm_joints, r_leg_joints, l_leg_joints
        ]

        body_list = ops.convert_n_to_tensor(body_list)

        for ind, each in enumerate(body_list):
            tmp = array_ops.transpose(each, perm=(1, 0, 2))
            batch_size = int(tmp.shape[0])
            body_list[ind] = array_ops.reshape(tmp, shape=(batch_size, -1))

        o_all_skel = _linear(
            [
                body_list[0], body_list[1], body_list[2], body_list[3],
                body_list[4], h
            ],  # here 111 + h_size
            5 * self._num_units,
            True)
        o_all_skel = sigmoid(o_all_skel)
        new_c_list = []
        for ind, each_part in enumerate(body_list):
            concat_p = _linear([each_part, h],
                               3 * self._num_units,
                               weight_name='weight_%d' % ind,
                               bias_name='bias_%d' % ind,
                               bias=True)
            ip, fp, gp = array_ops.split(value=concat_p,
                                         num_or_size_splits=3,
                                         axis=1)
            ip, fp, gp = sigmoid(ip), sigmoid(fp), tanh(gp)
            new_c = cs[ind] * (fp + self._forget_bias) + ip * gp
            new_c_list.append(new_c)

        new_c_tensors = array_ops.concat(new_c_list, axis=1)
        new_h = o_all_skel * tanh(array_ops.concat(new_c_list, 1))

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c_tensors, new_h)
        else:
            new_state = array_ops.concat([new_c_tensors, new_h], 1)

        return new_h, new_state
예제 #15
0
 def state_size(self):
     return (LSTMStateTuple(self._hidden_size, self._hidden_size))
예제 #16
0
    def __init__(self,
                 num_units,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 num_proj=None,
                 proj_clip=None,
                 num_unit_shards=None,
                 num_proj_shards=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 activation=None,
                 reuse=None,
                 name=None,
                 dtype=None):
        """Initialize the parameters for an LSTM cell.
        Args:
          num_units: int, The number of units in the LSTM cell.
          use_peepholes: bool, set True to enable diagonal/peephole connections.
          cell_clip: (optional) A float value, if provided the cell state is clipped
            by this value prior to the cell output activation.
          initializer: (optional) The initializer to use for the weight and
            projection matrices.
          num_proj: (optional) int, The output dimensionality for the projection
            matrices.  If None, no projection is performed.
          proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
            provided, then the projected values are clipped elementwise to within
            `[-proj_clip, proj_clip]`.
          num_unit_shards: Deprecated, will be removed by Jan. 2017.
            Use a variable_scope partitioner instead.
          num_proj_shards: Deprecated, will be removed by Jan. 2017.
            Use a variable_scope partitioner instead.
          forget_bias: Biases of the forget gate are initialized by default to 1
            in order to reduce the scale of forgetting at the beginning of
            the training. Must set it manually to `0.0` when restoring from
            CudnnLSTM trained checkpoints.
          state_is_tuple: If True, accepted and returned states are 2-tuples of
            the `c_state` and `m_state`.  If False, they are concatenated
            along the column axis.  This latter behavior will soon be deprecated.
          activation: Activation function of the inner states.  Default: `tanh`.
          reuse: (optional) Python boolean describing whether to reuse variables
            in an existing scope.  If not `True`, and the existing scope already has
            the given variables, an error is raised.
          name: String, the name of the layer. Layers with the same name will
            share weights, but to avoid mistakes we require reuse=True in such
            cases.
          dtype: Default dtype of the layer (default of `None` means use the type
            of the first input). Required when `build` is called before `call`.
          When restoring from CudnnLSTM-trained checkpoints, use
          `CudnnCompatibleLSTMCell` instead.
        """
        super(CustomLSTMCell, self).__init__(_reuse=reuse,
                                             name=name,
                                             dtype=dtype)
        if not state_is_tuple:
            logging.warn(
                "%s: Using a concatenated state is slower and will soon be "
                "deprecated.  Use state_is_tuple=True.", self)
        if num_unit_shards is not None or num_proj_shards is not None:
            logging.warn(
                "%s: The num_unit_shards and proj_unit_shards parameters are "
                "deprecated and will be removed in Jan 2017.  "
                "Use a variable scope with a partitioner instead.", self)

        # Inputs must be 2-dimensional.
        self.input_spec = base_layer.InputSpec(ndim=2)

        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._initializer = initializer
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._activation = activation or math_ops.tanh

        if num_proj:
            self._state_size = (LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
예제 #17
0
    def call(self, inputs, state):
        """	
		Long short-term unitary memory cell (LSTUM).
		"""
        c, h = state
        C = tf.reshape(
            c, [self._size_batch, self._hidden_size, self._hidden_size])

        concat = _linear([inputs, h], 4 * self._hidden_size, True)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=concat,
                                     num_or_size_splits=4,
                                     axis=1)

        d = sigmoid(i) * tanh(j)
        e = tf.multiply(
            C, tf.reshape(f, [
                self._size_batch,
                1,
                self._hidden_size,
            ])) + tf.reshape(d, [self._size_batch, 1, self._hidden_size])
        e_l = tf.Variable(tf.unstack(e, axis=2))  # 128 128 128
        bList = tf.Variable([tf.nn.l2_normalize(e_l[0], 1)])  # 1 128 128
        print(type(tf.shape(bList)[0]))
        #Gram-Schmidt loop
        i = tf.constant(0)
        loop_vars = [bList, i]
        shape_inv = [
            tf.TensorShape([None, self._size_batch, self._hidden_size]),
            i.get_shape()
        ]
        cond = lambda b_l, i: tf.less(i, self._size_batch)

        def F(b_l, i):
            TensorArr = tf.TensorArray(tf.float32,
                                       1,
                                       dynamic_size=True,
                                       infer_shape=False)
            array = TensorArr.unstack(b_l)
            print(array.read(0))

            b_u = tf.unstack(
                tf.reshape(
                    b_l,
                    [tf.shape(b_l)[0], self._size_batch, self._hidden_size]))
            input()

            b_u = tf.unstack(
                tf.reshape(b_l, [i + 1, self._size_batch, self._hidden_size]))
            dot = b_l[i] * b_u
            reduce_dot_prime = tf.reduce_sum(dot, axis=2)
            reduce_dot_final = tf.reduce_sum(b_l * reduce_dot_prime, axis=0)

            w_n = e_l[i] - reduce_dot_final
            w_n = tf.nn.l2_normalize(w_n, 1, epsilon=1e-8)
            b_l = tf.concat([
                b_l,
                tf.reshape(w_n, [1, self._size_batch, self._hidden_size])
            ], 0)
            return b_l, i

        b_list, _ = control_flow_ops.while_loop(cond, F, loop_vars, shape_inv)

        print(b_list)
        input

        new_C = tf.stack(b_list, axis=1)

        o = tf.reshape(o, [self._size_batch, self._hidden_size, 1])
        new_h = tf.matmul(self._activation(new_C), o)
        new_h = tf.reshape(new_h, [self._size_batch, self._hidden_size])
        new_c = tf.reshape(new_C, [self._size_batch, self._hidden_size**2])
        new_state = LSTMStateTuple(new_c, new_h)

        return new_h, new_state
예제 #18
0
    def call(self, inputs, state):
        """Run one step of LSTM.
        Args:
          inputs: input Tensor, 2D, `[batch, num_units].
          state: if `state_is_tuple` is False, this must be a state Tensor,
            `2-D, [batch, state_size]`.  If `state_is_tuple` is True, this must be a
            tuple of state Tensors, both `2-D`, with column sizes `c_state` and
            `m_state`.
        Returns:
          A tuple containing:
          - A `2-D, [batch, output_dim]`, Tensor representing the output of the
            LSTM after reading `inputs` when previous state was `state`.
            Here output_dim is:
               num_proj if num_proj was set,
               num_units otherwise.
          - Tensor(s) representing the new state of LSTM after reading `inputs` when
            the previous state was `state`.  Same type and shape(s) as `state`.
        Raises:
          ValueError: If input size cannot be inferred from inputs via
            static shape inference.
        """
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, h_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            h_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        # calculate softmaxed output
        y_prev = softmax(math_ops.matmul(self._y_w, h_prev) + self._y_b)

        lstm_matrix = math_ops.matmul(
            array_ops.concat([inputs, h_prev, y_prev], 1), self._kernel)
        lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias)

        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)
        # Diagonal connections
        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            h = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            h = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            h = math_ops.matmul(h, self._proj_kernel)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                h = clip_ops.clip_by_value(h, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, h)
                     if self._state_is_tuple else array_ops.concat([c, h], 1))
        return h, new_state
예제 #19
0
    def call(self, inputs, state):
        # print('state 0',inputs)
        sigmoid = math_ops.sigmoid
        tanh = math_ops.tanh
        if self._state_is_tuple:
            c0, h0 = state
        else:
            c0, h0 = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        # 时间差, 暂时转为浮点型
        # delt_t = float(array_ops.slice(delt_t,0,1))
        # text向量
        # text = array_ops.slice(inputs,1,128)

        # print('state 1')
        inputs_x = inputs[:, 1:]
        delt_t = inputs[:, 0:1]

        # print('state 1.1',inputs_x,h0)
        # 时间衰减部分
        with tf.variable_scope('1'):
            concat_time_x = _linear([inputs_x, h0],
                                    3 * self.num_units,
                                    bias=True)

        # print('state 1.2')
        # 文本部分
        with tf.variable_scope('2'):
            concat_x = _linear([inputs_x, h0], 3 * self.num_units, bias=True)

        # print('state 1.3')
        with tf.variable_scope('3'):
            output_x = _linear([inputs_x, h0], self.num_units, bias=True)

        # print('state 2')

        # 时间衰减部分
        i00, j00, f00 = array_ops.split(value=concat_time_x,
                                        num_or_size_splits=3,
                                        axis=1)
        # 文本部分
        i10, j10, f10 = array_ops.split(value=concat_x,
                                        num_or_size_splits=3,
                                        axis=1)

        # print('state 2.1')
        # print(c0 * math_ops.exp(-1 * delt_t) * sigmoid(f00 + self._forget_bias))
        # print((1 - math_ops.exp(-1 * delt_t)) * sigmoid(i00) * tanh(j00))
        new_c0 = c0 * math_ops.exp(
            -1 * delt_t) * sigmoid(f00 + self._forget_bias) + (
                1 - math_ops.exp(-1 * delt_t)) * sigmoid(i00) * tanh(j00)
        # new_c0 = c0 * sigmoid(f00 + self._forget_bias)

        # print('state 2.2')
        new_c0 = new_c0 * sigmoid(f10 +
                                  self._forget_bias) + sigmoid(i10) * tanh(j10)

        # print('state 2.3')
        new_h0 = tanh(new_c0) * sigmoid(output_x)

        # print('state 3')

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c0, new_h0)
        else:
            new_state = array_ops.concat([new_c0, new_h0], 1)

        # print('state 4')

        return new_h0, new_state
예제 #20
0
    def call(self, inputs, state):
        att_score = tf.expand_dims(inputs[:, -1], -1)
        time_now_score = tf.expand_dims(inputs[:, -2], -1)
        time_last_score = tf.expand_dims(inputs[:, -3], -1)
        inputs = inputs[:, :-3]
        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")

        if self._time_kernel_w1 is None:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._time_input_w1 = vs.get_variable(
                        "_time_input_w1", shape=[self._num_units], dtype=dtype)
                    self._time_input_bias1 = vs.get_variable(
                        "_time_input_bias1",
                        shape=[self._num_units],
                        dtype=dtype)
                    self._time_input_w2 = vs.get_variable(
                        "_time_input_w2", shape=[self._num_units], dtype=dtype)
                    self._time_input_bias2 = vs.get_variable(
                        "_time_input_bias2",
                        shape=[self._num_units],
                        dtype=dtype)
                    self._time_kernel_w1 = vs.get_variable(
                        "_time_kernel_w1",
                        shape=[input_size, self._num_units],
                        dtype=dtype,
                    )
                    self._time_kernel_t1 = vs.get_variable(
                        "_time_kernel_t1",
                        shape=[self._num_units, self._num_units],
                        dtype=dtype,
                    )
                    self._time_bias1 = vs.get_variable("_time_bias1",
                                                       shape=[self._num_units],
                                                       dtype=dtype)
                    self._time_kernel_w2 = vs.get_variable(
                        "_time_kernel_w2",
                        shape=[input_size, self._num_units],
                        dtype=dtype,
                    )
                    self._time_kernel_t2 = vs.get_variable(
                        "_time_kernel_t2",
                        shape=[self._num_units, self._num_units],
                        dtype=dtype,
                    )
                    self._time_bias2 = vs.get_variable("_time_bias2",
                                                       shape=[self._num_units],
                                                       dtype=dtype)
                    self._o_kernel_t1 = vs.get_variable(
                        "_o_kernel_t1",
                        shape=[self._num_units, self._num_units],
                        dtype=dtype,
                    )
                    self._o_kernel_t2 = vs.get_variable(
                        "_o_kernel_t2",
                        shape=[self._num_units, self._num_units],
                        dtype=dtype,
                    )

        time_now_input = tf.nn.tanh(time_now_score * self._time_input_w1 +
                                    self._time_input_bias1)
        time_last_input = tf.nn.tanh(time_last_score * self._time_input_w2 +
                                     self._time_input_bias2)

        time_now_state = (
            math_ops.matmul(inputs, self._time_kernel_w1) +
            math_ops.matmul(time_now_input, self._time_kernel_t1) +
            self._time_bias1)
        time_last_state = (
            math_ops.matmul(inputs, self._time_kernel_w2) +
            math_ops.matmul(time_last_input, self._time_kernel_t2) +
            self._time_bias2)

        if self._linear1 is None:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                if self._num_unit_shards is not None:
                    unit_scope.set_partitioner(
                        partitioned_variables.fixed_size_partitioner(
                            self._num_unit_shards))
                self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units,
                                        True)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = self._linear1([inputs, m_prev])
        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)
        o = (o + math_ops.matmul(time_now_input, self._o_kernel_t1) +
             math_ops.matmul(time_last_input, self._o_kernel_t2))
        # Diagonal connections
        if self._use_peepholes and not self._w_f_diag:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._w_f_diag = vs.get_variable("w_f_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_i_diag = vs.get_variable("w_i_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_o_diag = vs.get_variable("w_o_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)

        if self._use_peepholes:
            c = sigmoid(f + self._forget_bias + self._w_f_diag *
                        c_prev) * sigmoid(time_last_state) * c_prev + sigmoid(
                            i + self._w_i_diag * c_prev) * sigmoid(
                                time_now_state) * self._activation(j)
        else:
            c = sigmoid(f + self._forget_bias) * sigmoid(
                time_last_state) * c_prev + sigmoid(i) * sigmoid(
                    time_now_state) * self._activation(j)

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            if self._linear2 is None:
                scope = vs.get_variable_scope()
                with vs.variable_scope(scope, initializer=self._initializer):
                    with vs.variable_scope("projection") as proj_scope:
                        if self._num_proj_shards is not None:
                            proj_scope.set_partitioner(
                                partitioned_variables.fixed_size_partitioner(
                                    self._num_proj_shards))
                        self._linear2 = _Linear(m, self._num_proj, False)
            m = self._linear2(m)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type
        c = att_score * c + (1.0 - att_score) * c
        m = att_score * m + (1.0 - att_score) * m
        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state
예제 #21
0
파일: model.py 프로젝트: yyht/Graph2Seq
    def optimized_gcn_encode(self):
        # [node_size, hidden_layer_dim]
        embedded_node_rep = self.encode_node_feature(self.word_embeddings, self.feature_info)

        fw_sampler = UniformNeighborSampler(self.fw_adj_info)
        bw_sampler = UniformNeighborSampler(self.bw_adj_info)
        nodes = tf.reshape(self.batch_nodes, [-1, ])

        # batch_size = tf.shape(nodes)[0]

        # the fw_hidden and bw_hidden is the initial node embedding
        # [node_size, dim_size]
        fw_hidden = tf.nn.embedding_lookup(embedded_node_rep, nodes)
        bw_hidden = tf.nn.embedding_lookup(embedded_node_rep, nodes)

        # [node_size, adj_size]
        fw_sampled_neighbors = fw_sampler((nodes, self.sample_size_per_layer))
        bw_sampled_neighbors = bw_sampler((nodes, self.sample_size_per_layer))

        fw_sampled_neighbors_len = tf.constant(0)
        bw_sampled_neighbors_len = tf.constant(0)

        # sample
        for layer in range(self.sample_layer_size):
            if layer == 0:
                dim_mul = 1
            else:
                dim_mul = 2

            if layer > 6:
                fw_aggregator = self.fw_aggregators[6]
            else:
                fw_aggregator = MeanAggregator(dim_mul * self.hidden_layer_dim, self.hidden_layer_dim, concat=self.concat, mode=self.mode)
                self.fw_aggregators.append(fw_aggregator)

            # [node_size, adj_size, word_embedding_dim]
            if layer == 0:
                neigh_vec_hidden = tf.nn.embedding_lookup(embedded_node_rep, fw_sampled_neighbors)

                # compute the neighbor size
                tmp_sum = tf.reduce_sum(tf.nn.relu(neigh_vec_hidden), axis=2)
                tmp_mask = tf.sign(tmp_sum)
                fw_sampled_neighbors_len = tf.reduce_sum(tmp_mask, axis=1)

            else:
                neigh_vec_hidden = tf.nn.embedding_lookup(
                    tf.concat([fw_hidden, tf.zeros([1, dim_mul * self.hidden_layer_dim])], 0), fw_sampled_neighbors)

            fw_hidden = fw_aggregator((fw_hidden, neigh_vec_hidden, fw_sampled_neighbors_len))


            if self.graph_encode_direction == "bi":
                if layer > 6:
                    bw_aggregator = self.bw_aggregators[6]
                else:
                    bw_aggregator = MeanAggregator(dim_mul * self.hidden_layer_dim, self.hidden_layer_dim, concat=self.concat, mode=self.mode)
                    self.bw_aggregators.append(bw_aggregator)

                if layer == 0:
                    neigh_vec_hidden = tf.nn.embedding_lookup(embedded_node_rep, bw_sampled_neighbors)

                    # compute the neighbor size
                    tmp_sum = tf.reduce_sum(tf.nn.relu(neigh_vec_hidden), axis=2)
                    tmp_mask = tf.sign(tmp_sum)
                    bw_sampled_neighbors_len = tf.reduce_sum(tmp_mask, axis=1)

                else:
                    neigh_vec_hidden = tf.nn.embedding_lookup(
                        tf.concat([bw_hidden, tf.zeros([1, dim_mul * self.hidden_layer_dim])], 0), bw_sampled_neighbors)

                bw_hidden = bw_aggregator((bw_hidden, neigh_vec_hidden, bw_sampled_neighbors_len))

        # hidden stores the representation for all nodes
        fw_hidden = tf.reshape(fw_hidden, [-1, self.single_graph_nodes_size, 2 * self.hidden_layer_dim])
        if self.graph_encode_direction == "bi":
            bw_hidden = tf.reshape(bw_hidden, [-1, self.single_graph_nodes_size, 2 * self.hidden_layer_dim])
            hidden = tf.concat([fw_hidden, bw_hidden], axis=2)
        else:
            hidden = fw_hidden

        hidden = tf.nn.relu(hidden)

        pooled = tf.reduce_max(hidden, 1)
        if self.graph_encode_direction == "bi":
            graph_embedding = tf.reshape(pooled, [-1, 4 * self.hidden_layer_dim])
        else:
            graph_embedding = tf.reshape(pooled, [-1, 2 * self.hidden_layer_dim])

        graph_embedding = LSTMStateTuple(c=graph_embedding, h=graph_embedding)

        # shape of hidden: [batch_size, single_graph_nodes_size, 4 * hidden_layer_dim]
        # shape of graph_embedding: ([batch_size, 4 * hidden_layer_dim], [batch_size, 4 * hidden_layer_dim])
        return hidden, graph_embedding
예제 #22
0
    def __init__(
        self,
        num_units,
        use_peepholes=False,
        cell_clip=None,
        initializer=None,
        num_proj=None,
        proj_clip=None,
        num_unit_shards=None,
        num_proj_shards=None,
        forget_bias=1.0,
        state_is_tuple=True,
        activation=None,
        reuse=None,
    ):

        super(Time4LSTMCell, self).__init__(_reuse=reuse)
        if not state_is_tuple:
            logging.warn(
                "%s: Using a concatenated state is slower and will soon be "
                "deprecated.  Use state_is_tuple=True.",
                self,
            )
        if num_unit_shards is not None or num_proj_shards is not None:
            logging.warn(
                "%s: The num_unit_shards and proj_unit_shards parameters are "
                "deprecated and will be removed in Jan 2017.  "
                "Use a variable scope with a partitioner instead.",
                self,
            )

        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._initializer = initializer
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._activation = activation or math_ops.tanh

        if num_proj:
            self._state_size = (LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
        self._linear1 = None
        self._linear2 = None
        self._time_input_w1 = None
        self._time_input_w2 = None
        self._time_kernel_w1 = None
        self._time_kernel_t1 = None
        self._time_bias1 = None
        self._time_kernel_w2 = None
        self._time_kernel_t2 = None
        self._time_bias2 = None
        self._o_kernel_t1 = None
        self._o_kernel_t2 = None
        if self._use_peepholes:
            self._w_f_diag = None
            self._w_i_diag = None
            self._w_o_diag = None
예제 #23
0
    def call(self, inputs, state):
        """Multiple Input LSTM
            Args:
              inputs: `2-D` tensor with shape `[batch_size, input_size]`.
              state: An `LSTMStateTuple` of state tensors, each shaped
                `[batch_size, num_units]`, if `state_is_tuple` has been set to
                `True`.  Otherwise, a `Tensor` shaped
                `[batch_size, 2 * num_units]`.

            Returns:
              A pair containing the new hidden state, and the new state (either a
                `LSTMStateTuple` or a concatenated state, depending on
                `state_is_tuple`).
            """

        def calc_cell_state_tilde(input, h, w, b):
            """
            :param input: shape (B, q)
            :param h: shape (B, p)
            :param w: shape ((p+q), p)
            :param b: shape (p,)    TODO check
            :return: shape (B, p)
            """
            C_t = matmul(
                concat([input, h], 1), w)  # [B, (p+q)] * [(p+q), p] = B * p
            C_t = nn_ops.bias_add(C_t, b)
            return tanh(C_t)

        def calc_input_gate(input, h, w, b):
            """
            :param input: shape (B, q)
            :param h: shape (B, p)
            :param w: shape ((p+q), p)
            :param b: shape (p,)
            :return: shape (B, p)
            """
            input_gate = matmul(
                concat([input, h], 1), w)  # (B, (p+q)) * ((p+q), p) = (B, p)
            input_gate = nn_ops.bias_add(input_gate, b)
            return sigmoid(input_gate)

        def calc_pre_attention(l, w_attn, pre_cell_state, b_attn):
            u = matmul(
                a=l, b=w_attn)  # (B,p) * (p,p) = (B,p)
            u = multiply(   # TODO, check is here correct? element
                u, pre_cell_state)  # (B,p) * (B, p) = (B, p)
            u = tf.reduce_sum(u, 1)
            u = tf.expand_dims(u, 1)
            # u = tf.reshape(u, [u.shape[0], 1])
            u = nn_ops.bias_add(u, b_attn)
            return tanh(u)

        one = constant_op.constant(1, dtype=dtypes.int32)
        zero = constant_op.constant(0, dtype=dtypes.int32)

        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = split(value=state, num_or_size_splits=2, axis=one)

        # TODO check
        """
        Thus matrix W cp , W cn , W ci and biases b cp , b cn , b ci are all initialized to 0 which means that the 
        auxiliary factors are ignored in the very beginning. Hopefully the information from auxiliary factors will 
        gradually flow in with the training process under the control of mainstream.
        """
        W_f, W_c, W_cp, W_cn, W_ci, W_i, W_ip, W_in, W_ii, W_o \
            = split(value=self._kernel, num_or_size_splits=10, axis=one)   # ((p+q), p)

        b_f, b_c, b_cp, b_cn, b_ci, b_i, b_ip, b_in, b_ii, b_o \
            = split(value=self._bias, num_or_size_splits=10, axis=zero)  # (1, p)

        # split the inputs into multiple pieces
        pieces = self._input_divider
        input_pieces = split(value=inputs, num_or_size_splits=pieces, axis=one)

        input_y = input_pieces[0]
        input_p = input_pieces[1]
        input_n = input_pieces[2]
        input_i = input_pieces[3]

        C_tilde_t = calc_cell_state_tilde(input_y, h, W_c, b_c)  # shape = (B,p)
        C_tilde_pt = calc_cell_state_tilde(input_p, h, W_cp, b_cp)
        C_tilde_nt = calc_cell_state_tilde(input_n, h, W_cn, b_cn)
        C_tilde_it = calc_cell_state_tilde(input_i, h, W_ci, b_ci)

        i_t = calc_input_gate(input_y, h, W_i, b_i)  # shape = (B,p)
        i_pt = calc_input_gate(input_y, h, W_ip, b_ip)
        i_nt = calc_input_gate(input_y, h, W_in, b_in)
        i_it = calc_input_gate(input_y, h, W_ii, b_ii)

        l_t = multiply(C_tilde_t, i_t)   # shape = (B,p)
        l_pt = multiply(C_tilde_pt, i_pt)
        l_nt = multiply(C_tilde_nt, i_nt)
        l_it = multiply(C_tilde_it, i_it)

        # get the attention weights and bias
        w_attn = self._w_attn   # shape = (p,p)
        # b_attn =
        b_attn_t, b_attn_pt, b_ttn_nt, b_attn_it, \
            = split(value=self._b_attn, num_or_size_splits=self._input_divider, axis=zero)

        u_t = calc_pre_attention(l_t, w_attn, c, b_attn_t) # shape = (B,1)
        u_pt = calc_pre_attention(l_pt, w_attn, c, b_attn_pt)
        u_nt = calc_pre_attention(l_nt, w_attn, c, b_ttn_nt)
        u_it = calc_pre_attention(l_it, w_attn, c, b_attn_it)

        attn = tf.nn.softmax(concat([u_t, u_pt, u_nt, u_it], axis=1))  # shape of logits: (B, 4)

        attn_t, attn_pt, attn_nt, attn_it = split(value=attn, num_or_size_splits=4, axis=one)  # shape = (B, 1)

        # the final cell state input l, shape = (B,p)
        # TODO check the multiply behavior
        l = multiply(l_t, attn_t) + multiply(l_pt, attn_pt) + multiply(l_nt, attn_nt) + multiply(l_it, attn_it)

        # The forget gate and output gate of LSTM remain the same compared with the original LSTM
        # shapes --
        #   input_y: (B, q)
        #   h: (B, p)
        #   W_f: ((p+q), p)
        #   b_f: (p, p)
        f_t = calc_input_gate(input_y, h, W_f, b_f)  # shape (B, p)
        o_t = calc_input_gate(input_y, h, W_o, b_o)

        new_c = multiply(c, f_t) + l  # shape of c and new_c: (B,p)
        new_h = multiply(tanh(new_c), o_t)  # shape new_h: (B,p)

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, new_h)
        else:
            new_state = concat([new_c, new_h], 1)

        return new_h, new_state
예제 #24
0
    def call(self, inputs, state):
        """	
		Long short-term unitary memory cell (LSTUM).
		"""

        if self._isMatrix:
            c, h = state
            C = tf.reshape(
                c, [self._size_batch, self._hidden_size, self._hidden_size])
        else:
            c, h = state

        concat = _linear([inputs, h], 4 * self._hidden_size, True)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=concat,
                                     num_or_size_splits=4,
                                     axis=1)

        d = sigmoid(i) * tanh(j)

        if self._isMatrix:
            d_temp = tf.matmul(
                C, tf.reshape(d, [self._size_batch, self._hidden_size, 1]))
            d = tf.reshape(d_temp, [self._size_batch, self._hidden_size])

        #get the rotation matrix from f to d
        step1 = tf.nn.l2_normalize(f, 1, epsilon=1e-8)
        step2 = tf.nn.l2_normalize(d, 1, epsilon=1e-8)
        costh = tf.reduce_sum(step1 * step2, 1)

        sinth = tf.sqrt(1 - costh**2)
        step4 = tf.reshape(costh, [self._size_batch, 1])
        step5 = tf.reshape(sinth, [self._size_batch, 1])
        step6 = tf.concat([step4, -step5, step5, step4], axis=1)
        Rth = tf.reshape(step6, [self._size_batch, 2, 2])

        #get the u and v vectors
        u = step1
        step8 = d - tf.reshape(tf.reduce_sum(u * d, 1),
                               [self._size_batch, 1]) * u
        v = tf.nn.l2_normalize(step8, 1, epsilon=1e-8)

        #concatenate the two vectors
        step9 = tf.reshape(u, [self._size_batch, 1, self._hidden_size])
        step14 = tf.reshape(v, [self._size_batch, 1, self._hidden_size])
        step15 = tf.concat([step9, step14], axis=1)
        step16 = tf.transpose(step15, [0, 2, 1])

        #do the batch matmul
        step10 = tf.reshape(u, [self._size_batch, self._hidden_size, 1])
        step11 = tf.transpose(step10, [0, 2, 1])
        uuT = tf.matmul(step10, step11)
        step12 = tf.reshape(v, [self._size_batch, self._hidden_size, 1])
        step13 = tf.transpose(step12, [0, 2, 1])
        vvT = tf.matmul(step12, step13)

        #put all together
        I = tf.eye(self._hidden_size, batch_shape=[self._size_batch])
        step17 = tf.matmul(tf.matmul(step16, Rth), step15)
        res = I - uuT - vvT - step17

        if self._isMatrix:
            new_C = res
            o = tf.reshape(o, [self._size_batch, self._hidden_size, 1])
            new_h = tf.matmul(self._activation(new_C), o)
            new_h = tf.reshape(new_h, [self._size_batch, self._hidden_size])
            new_c = tf.reshape(new_C, [self._size_batch, self._hidden_size**2])
        else:
            new_c = tf.reshape(
                tf.matmul(
                    res, tf.reshape(c,
                                    [self._size_batch, self._hidden_size, 1])),
                [self._size_batch, self._hidden_size])
            new_h = self._activation(new_c) * o

        new_state = LSTMStateTuple(new_c, new_h)

        return new_h, new_state
예제 #25
0
  def call(self, inputs, state):
    """Long short-term memory cell with attention (LSTMA)."""
    if self._state_is_tuple:
      state, attns, attn_states,histotry = state

    else:
      states = state
      state = array_ops.slice(states, [0, 0], [-1, self._cell.state_size])
      attns = array_ops.slice(
          states, [0, self._cell.state_size], [-1, self._attn_size])
      attn_states = array_ops.slice(
          states, [0, self._cell.state_size + self._attn_size],
          [-1, self._attn_size * self._attn_length])


    attn_states = array_ops.reshape(attn_states,
                                    [-1, self._attn_length, self._attn_size])
    input_size = self._input_size
    if input_size is None:
      input_size = inputs.get_shape().as_list()[1]
    if self._linear1 is None:
      self._linear1 = _Linear([inputs, attns], input_size, True)
    inputs = self._linear1([inputs, attns])

    cell_output, new_state = self._cell(inputs, state)
    #print("new state",new_state)


    if self._state_is_tuple:
      new_state_cat = array_ops.concat(nest.flatten(new_state), 1)
    else:
      new_state_cat = new_state
    new_attns, new_attn_states = self._attention(new_state_cat, attn_states)
    with vs.variable_scope("attn_output_projection"):
      if self._linear2 is None:
        self._linear2 = _Linear([cell_output, new_attns], self._attn_size, True)

      output = self._linear2([cell_output, new_attns])

    #print("output",output)

    new_attn_states = array_ops.concat(
        [new_attn_states, array_ops.expand_dims(output, 1)], 1)
    new_attn_states = array_ops.reshape(
        new_attn_states, [-1, self._attn_length * self._attn_size])

    c_new, h_new = new_state

    #print("c_new", c_new)
    #print("h_new", h_new)

    label_emb = tf.nn.relu(tf.matmul(output, self.emb_M3))
    # label_emb = tf.expand_dims(label_emb, axis=1)
    #print("label emb",label_emb)
    #print("new stat", new_state)

    pre_history = histotry
    pre_history= tf.reshape(pre_history, shape=[-1, self.config.use_K_histroy, self.config.label_emb_size])
    #print("pre_history",pre_history)

    new_history = tf.slice(pre_history, [0, 1, 0], [-1, self.config.use_K_histroy - 1, self.config.label_emb_size])

    #print("new_history", new_history)
    # print("label_emb", label_emb)

    concat_his = tf.concat([new_history, tf.expand_dims(label_emb,axis=1)], axis=1)
    #print("concat_his_tmp", concat_his)

    concat_all = tf.concat([concat_his,  tf.expand_dims(c_new,axis=1)], axis=1)

    #print("c_new",c_new)

    concat_all_flatten = tf.reshape(concat_all,
                                shape=[-1, (self.config.use_K_histroy + 1) * self.config.label_emb_size])

    concat_his_flatten = tf.reshape(concat_his,
                                     shape=[-1, self.config.use_K_histroy * self.config.label_emb_size])

    c = tf.nn.relu(tf.matmul(concat_all_flatten, self.emb_M4k))

    new_state= LSTMStateTuple(c, h_new)

    new_wrapper_state = (new_state, new_attns, new_attn_states, concat_his_flatten)



    return output, new_wrapper_state
    def call(self, inputs, state):

        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        (c_prev, m_prev) = state

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        if self._linear1 is None:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                if self._num_unit_shards is not None:
                    unit_scope.set_partitioner(
                        partitioned_variables.fixed_size_partitioner(
                            self._num_unit_shards))
                self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units,
                                        True)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = self._linear1([inputs, m_prev])

        i, j, f, o = array_ops.split(value=lstm_matrix,
                                     num_or_size_splits=4,
                                     axis=1)
        # Diagonal connections
        if self._use_peepholes and not self._w_f_diag:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._w_f_diag = vs.get_variable("w_f_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_i_diag = vs.get_variable("w_i_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_o_diag = vs.get_variable("w_o_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)

        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
        # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
        else:
            m = sigmoid(o) * self._activation(c)

        if self._num_proj is not None:
            if self._linear2 is None:
                scope = vs.get_variable_scope()
                with vs.variable_scope(scope, initializer=self._initializer):
                    with vs.variable_scope("projection") as proj_scope:
                        if self._num_proj_shards is not None:
                            proj_scope.set_partitioner(
                                partitioned_variables.fixed_size_partitioner(
                                    self._num_proj_shards))
                        self._linear2 = _Linear(m, self._num_proj, False)
            m = self._linear2(m)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))
        return m, new_state
예제 #27
0
    def call(self, inputs, state):
        """Run one step of the GraphLSTM cell.

        Args:
          inputs: `2-D` tensor with shape `[batch_size x input_size]`.
          state: An `LSTMStateTuple` of state tensors, each shaped
            `[batch_size x self.state_size]`, if `state_is_tuple` has been set to
            `True`.  Otherwise, a `Tensor` shaped
            `[batch_size x 2 * self.state_size]`.

        Returns:
          A tuple, containing the new hidden state and the new state (either a
            `LSTMStateTuple` or a concatenated state, depending on
            `state_is_tuple`).
        """
        sigmoid = math_ops.sigmoid
        tanh = math_ops.tanh

        # initialize cell weights
        weight_dict = self._init_weights(inputs)

        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            m_i, h_i = state
        else:
            m_i, h_i = array_ops.split(value=state,
                                       num_or_size_splits=2,
                                       axis=1)

        # "shared weight metrics Ufn for all nodes are learned to guarantee the spatial transformation
        # invariance and enable the learning with various neighbors": GraphLSTM cells have to be generalized to be able
        # to be applied to any random image superpixel region, whereas for hand pose estimation, we want each cell to
        # specialize on its joint

        # in the paper, all cells are generalized and thus do not need to know about the nature of their
        # neighbours. However, we want cells specifically trained for certain joint, so information about which
        # neighbouring cell belongs to which node might be interesting ... kind of a "hard wired" Graph LSTM
        # But: that's good! -> Own contribution, learn generic hand model / even learn individual hand sizes?

        # self._neighbour_states: a list of n `LSTMStateTuples` of state tensors (m_j, h_j)
        if not hasattr(self, "_neighbour_states"):
            raise LookupError(
                "Could not find variable 'self._neighbour_states' during 'GraphLSTMCell.call'.\n"
                "This likely means 'call' was called directly, instead of through '__call__' (which "
                "should be the case when called from inside the tensorflow framework)."
            )
        # extract two vectors of n ms and n hs from state vector of n (m,h) tuples
        m_j_all, h_j_all = zip(*self._neighbour_states)

        # IMPLEMENTATION DIFFERS FROM PAPER: in eq. (2) g^f_ij uses h_j,t regardless of if node j has been updated
        # already or not. Implemented here is h_j,t for non-updated nodes and h_j,t+1 for updated nodes
        # which both makes sense intuitively (most recent information)
        # and is more lightweight (no need to keep track of old states)

        # Eq. 1: averaged hidden states for neighbouring nodes h^-_{i,t}
        h_j_avg = math_ops.reduce_mean(h_j_all, axis=0)

        # fetch weights and biases
        w_u = weight_dict[_W_U]
        w_f = weight_dict[_W_F]
        w_c = weight_dict[_W_C]
        w_o = weight_dict[_W_O]
        u_u = weight_dict[_U_U]
        u_f = weight_dict[_U_F]
        u_c = weight_dict[_U_C]
        u_o = weight_dict[_U_O]
        u_un = weight_dict[_U_UN]
        u_fn = weight_dict[_U_FN]
        u_cn = weight_dict[_U_CN]
        u_on = weight_dict[_U_ON]
        b_u = weight_dict[_B_U]
        b_f = weight_dict[_B_F]
        b_c = weight_dict[_B_C]
        b_o = weight_dict[_B_O]

        # Eq. 2
        # input gate
        # g_u = sigmoid ( f_{i,t+1} * W_u + h_{i,t} * U_u + h^-_{i,t} * U_{un} + b_u )
        g_u = sigmoid(
            _graphlstm_linear([w_u, u_u, u_un, b_u], [inputs, h_i, h_j_avg]))
        # adaptive forget gate
        # g_fij = sigmoid ( f_{i,t+1} * W_f + h_{j,t} * U_fn + b_f ) for every neighbour j
        g_fij = [
            sigmoid(_graphlstm_linear([w_f, u_fn, b_f], [inputs, h_j]))
            for h_j in h_j_all
        ]
        # forget gate
        # g_fi = sigmoid ( f_{i,t+1} * W_f + h_{i,t} * U_f + b_f )
        g_fi = sigmoid(_graphlstm_linear([w_f, u_f, b_f], [inputs, h_i]))
        # output gate
        # g_o = sigmoid ( f_{i,t+1} * W_o + h_{i,t} * U_o + h^-_{i,t} * U_{on} + b_o )
        g_o = sigmoid(
            _graphlstm_linear([w_o, u_o, u_on, b_o], [inputs, h_i, h_j_avg]))
        # memory gate
        # g_c = tanh ( f_{i,t+1} * W_c + h_{i,t} * U_c + h^-_{i,t} * U_{cn} + b_c )
        g_c = tanh(
            _graphlstm_linear([w_c, u_c, u_cn, b_c], [inputs, h_i, h_j_avg]))

        # new memory states
        # m_i_new = sum ( g_fij .* most recent state of each neighbouring node ) / number of neighbouring nodes ...
        #       ... + g_fi .* m_i + g_u .* g_c
        m_i_new = math_ops.reduce_mean(
            [g * m_j for g, m_j in zip(g_fij, m_j_all)],
            axis=0) + g_fi * m_i + g_u * g_c

        # new hidden states
        # h_i_new = tanh ( g_o .* m_i_new )
        h_i_new = tanh(g_o * m_i_new)

        # Eq. 3 (return values)
        if self._state_is_tuple:
            new_state = LSTMStateTuple(m_i_new, h_i_new)
        else:
            new_state = array_ops.concat([m_i_new, h_i_new], 1)
        return h_i_new, new_state
예제 #28
0
                                    dtype=tf.float32,
                                    swap_memory=False,
                                    time_major=True,
                                    scope=None)
)

# bidirectional step(forward and backward) : expensive but better prediction
encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)
encoder_final_state_c = tf.concat(
    (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
encoder_final_state_h = tf.concat(
    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

# combine all together(backward and forward final state) for decoder feed
encoder_final_state = LSTMStateTuple(
    c=encoder_final_state_c,
    h=encoder_final_state_h
)

# defining decoder :batch size is the most important one !!
# LSTM (Long short term memory units)
decoder_cell = LSTMCell(decoder_hidden_units)
encoder_max_time, batch_size = tf.unstack(tf.shape(encoder_inputs))
decoder_lengths = encoder_inputs_length + 3
# ass 3 bcz 2 additional steps below
# 1 for the leading end of sentence token for the decoder input
# we want it to be a little bigger for the end of sentence token which indicates the end of sequence

# dividing into small batch size=> make prediction better (little more computationally expensive), not always
# GRU has less gates than LSTM (less expensive but tends to have better results specifically for dynamic network=>coooooool!!!)

# defining weights and biases
예제 #29
0
 def state_size(self):
     return (LSTMStateTuple(self._num_units, self._num_units)
             if self._state_is_tuple else 2 * self._num_units)
    def build_graph(self, hparams, scope=None):
        """Subclass must implement this method.
    
        Creates a sequence-to-sequence model with dynamic RNN decoder API.
        Args:
          hparams: Hyperparameter configurations.
          scope: VariableScope for the created subgraph; default "dynamic_seq2seq".
    
        Returns:
          A tuple of the form (logits, loss, final_context_state),
          where:
            logits: float32 Tensor [batch_size x num_decoder_symbols].
            loss: the total loss / batch_size.
            final_context_state: The final state of decoder RNN.
    
        Raises:
          ValueError: if encoder_type differs from mono and bi, or
            attention_option is not (luong | scaled_luong |
            bahdanau | normed_bahdanau).
        """
        utils.print_out("# creating %s graph ..." % self.mode)
        dtype = tf.float32
        num_layers = hparams.num_layers
        num_gpus = hparams.num_gpus

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):
            # Encoder
            encoder_outputs, encoder_state = self._build_encoder(hparams)

            # by default, `self` is of type `mnt.model.Model` and `encoder_outputs` is of type `Tensor`
            # if we are infer, output/print the data of `encoder_state`
            if self.mode == tf.contrib.learn.ModeKeys.INFER:
                # use `tf.Print` to print all data of `encode_state`, which is the content vectors
                summarize_size = 1024 * 16
                first_n_size = -1

                # state_tuple_index = 0
                # def decorate_state_tuple_with_print(state_tuple):
                #     nonlocal state_tuple_index
                #     return_tuple = LSTMStateTuple(
                #         tf.Print(state_tuple.c,
                #                  [state_tuple.c],
                #                  "EncodeState%d = " % (state_tuple_index * 2),
                #                  first_n=first_n_size,
                #                  summarize=summarize_size),
                #         tf.Print(state_tuple.h,
                #                  [state_tuple.h],
                #                  "EncodeState = " % (state_tuple_index * 2 + 1),
                #                  first_n=first_n_size,
                #                  summarize=summarize_size)
                #     )
                #     state_tuple_index += 1
                #     return return_tuple
                #
                # wrapped_encoder_state = tuple(map(decorate_state_tuple_with_print, encoder_state))

                # @see http://www.cnblogs.com/rocketfan/p/6257137.html
                # It seems that the `encoder_state[0]` is the state of the hidden layer of the encoder,
                # and the `encoder_state[0]` is the state of the output layer of the encoder,
                # and `h` in a `LSTMStateTuple` is the output.
                wrapped_encoder_state = \
                    (LSTMStateTuple(
                        tf.Print(encoder_state[0].c, [encoder_state[0].c], "EncodeState0C = ", first_n=first_n_size, summarize=summarize_size),
                        tf.Print(encoder_state[0].h, [encoder_state[0].h], "EncodeState0H = ", first_n=first_n_size, summarize=summarize_size)),
                     LSTMStateTuple(
                         tf.Print(encoder_state[1].c, [encoder_state[1].c], "EncodeState1C = ", first_n=first_n_size, summarize=summarize_size),
                         tf.Print(encoder_state[1].h, [encoder_state[1].h], "EncodeState1H = ", first_n=first_n_size, summarize=summarize_size))
                    )

                encoder_state = wrapped_encoder_state

            ## Decoder
            logits, sample_id, final_context_state = self._build_decoder(
                encoder_outputs, encoder_state, hparams)

            ## Loss
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                with tf.device(
                        model_helper.get_device_str(num_layers - 1, num_gpus)):
                    loss = self._compute_loss(logits)
            else:
                loss = None

            return logits, loss, final_context_state, sample_id