Beispiel #1
0
    def __init__(self, name: str, env, temp=0.1):
        """
        :param name: string
        :param env: gym env
        :param temp: temperature of boltzmann distribution
        """

        ob_space = env.observation_space
        act_space = env.action_space
        # policy_state = tf.placeholder(tf.float32, [1, 256], name='pi_state')
        # value_state = tf.placeholder(tf.float32, [1, 256],name='v_state')

        with tf.variable_scope(name):
            self.obs = tf.placeholder(dtype=tf.float32,
                                      shape=[None] + list(ob_space.shape),
                                      name='obs')
            rnn_in = tf.expand_dims(self.obs, [0])

            with tf.variable_scope('policy_net'):
                gru_cell = WeightedNormGRUCell(
                    256,
                    activation=nn.relu,
                    kernel_initializer=tf.initializers.orthogonal(),
                    bias_initializer=tf.zeros_initializer())
                outputs, states = nn.dynamic_rnn(gru_cell,
                                                 inputs=rnn_in,
                                                 dtype=tf.float32)
                outputs = tf.reshape(outputs, [-1, 256])
                self.act_probs = dense(outputs, act_space.n, nonlinearity=None)
                self.policy_states = states

            with tf.variable_scope('value_net'):
                gru_cell = WeightedNormGRUCell(
                    256,
                    activation=nn.relu,
                    kernel_initializer=tf.initializers.orthogonal(),
                    bias_initializer=tf.zeros_initializer())
                outputs, states = nn.dynamic_rnn(gru_cell,
                                                 inputs=rnn_in,
                                                 dtype=tf.float32)
                outputs = tf.reshape(outputs, [-1, 256])
                self.v_preds = tf.layers.dense(
                    outputs,
                    units=1,
                    activation=None,
                    kernel_initializer=tf.glorot_normal_initializer(),
                    bias_initializer=tf.zeros_initializer())
                self.value_states = states

            self.act_stochastic = tf.multinomial(tf.nn.log_softmax(
                self.act_probs),
                                                 num_samples=1)
            self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1])

            self.act_deterministic = tf.argmax(self.act_probs, axis=1)

            self.scope = tf.get_variable_scope().name
Beispiel #2
0
    def __call__(self, inputs, batch_sz):
        pr_shape = lambda var: print(var.shape)

        if self.rnntype == "GRU":
            print("rnntype: " + self.rnntype)
            cell = nn.rnn_cell.GRUCell(self.n_hidden)
        else:
            print("rnntype: " + self.rnntype)
            cell = nn.rnn_cell.LSTMCell(self.n_hidden)

        initial_state = cell.zero_state(batch_sz, tf.float32)
        # initial_state = cell.zero_state(inputs.shape[0], tf.float32)

        # dynamic_rnn inputs shape = [batch_size, max_time, ...]
        # outs shape = [batch_size, max_time, cell.output_size]
        # states shape = [batch_size, cell.state_size]
        # n_step = int(inputs.shape[1]) # n_step

        outs, states = nn.dynamic_rnn(cell,
                                      inputs,
                                      initial_state=initial_state,
                                      dtype=tf.float32)
        print('outs shape: ')
        pr_shape(outs)  # (batch_sz, max_time, n_hidden)
        # final_state = states[-1] #
        print('states shape: ')
        pr_shape(states)  # (batch_sz, n_hidden)

        FC = tl.Dense(self.n_classes,
                      use_bias=True,
                      kernel_initializer=tc.layers.xavier_initializer(
                          tf.float32))
        outs = FC(states)

        return outs
Beispiel #3
0
    def _build_sum(self, cell):
        """generate  user memory states from behavior sequence

        Param: an initiazlied sum cell

        Returns:
            obj: a flatten representation of user memory states, in the shape of (BatchSize, SlotsNum x HiddenSize)
        """
        hparams = self.hparams
        with tf.variable_scope("sum"):
            self.mask = self.iterator.mask
            self.sequence_length = tf.reduce_sum(self.mask, 1)

            rum_outputs, final_state = dynamic_rnn(
                cell,
                inputs=self.history_embedding,
                dtype=tf.float32,
                sequence_length=self.sequence_length,
                scope="sum",
                initial_state=cell.zero_state(
                    tf.shape(self.history_embedding)[0], tf.float32),
            )

            final_state = final_state[:, :hparams.slots * hparams.hidden_size]

            self.heads = cell.heads
            self.alpha = cell._alpha
            self.beta = cell._beta
            tf.summary.histogram("SUM_outputs", rum_outputs)

        return final_state
Beispiel #4
0
    def _forward_score(self):
        """pass"""
        with tf.variable_scope("fuzzy_crf_forward"):
            first_fea = tf.squeeze(
                tf.slice(self.token_vec_place, [0, 0, 0], [-1, 1, -1]), [1])
            first_tag = tf.squeeze(
                tf.slice(self.tag_place, [0, 0, 0], [-1, 1, -1]),
                [1])  # [batch, num_tag]

            first_state = tf.multiply(first_fea,
                                      tf.cast(first_tag,
                                              tf.float32))  # [batch, num_tag]

            # ===========
            rest_fea = tf.slice(self.token_vec_place, [0, 1, 0], [-1, -1, -1])
            rest_tag = tf.slice(self.tag_place, [0, 1, 0], [-1, -1, -1])
            rest_unk = self._sinpath_mask(
            )  # [batch, max_seq_len - 1, num_tag, num_tag]

            forward_cell = fuzzyCrfForwardCell(self.transitions)
            sequence_lengths_less_one = tf.maximum(
                tf.constant(0, dtype=self.seq_len_place.dtype),
                self.seq_len_place - 1)

            _, scores = dynamic_rnn(cell=forward_cell,
                                    inputs=(rest_fea, rest_tag, rest_unk),
                                    sequence_length=sequence_lengths_less_one,
                                    initial_state=first_state,
                                    dtype=tf.float32)

            self.forward_score = tf.reduce_logsumexp(scores, [1])  # [batch]
    def prediction(self):
        num_units = [self._num_RNN, self._num_RNN]
        cells = [nn.rnn_cell.GRUCell(n) for n in num_units]
        stacked_rnn = tf.contrib.rnn.MultiRNNCell(cells)
        # Recurrent network.
        output_RNN, _ = nn.dynamic_rnn(
            stacked_rnn,
            self.data,
            dtype=tf.float32,
            sequence_length=self.length,
        )

        batch_size = tf.shape(output_RNN)[0]
        max_length = int(output_RNN.get_shape()[1])
        output_size = int(output_RNN.get_shape()[2])

        output_reshape = tf.reshape(output_RNN, [-1, output_size])
        weight_l1, bias_l1 = self._weight_and_bias(self._num_RNN, self.layer1)
        output_l1 = tf.nn.tanh(tf.matmul(output_reshape, weight_l1) + bias_l1)
        output_drop_out_l1 = tf.nn.dropout(output_l1, 0.2, seed=47)
        output_drop_out_reshape_l1 = tf.reshape(
            output_drop_out_l1, [batch_size, max_length, self.layer1])

        last = self._last_relevant(output_drop_out_reshape_l1, self.length)

        weight_class, bias_class = self._weight_and_bias(
            self.layer1, int(self.target.get_shape()[1]))
        # Softmax layer.
        prediction = tf.nn.softmax(tf.matmul(last, weight_class) + bias_class)
        return prediction, weight_l1, bias_l1, weight_class, bias_class
Beispiel #6
0
def build_cell(cell_input, input_len, batch_size):
    cell = nn.rnn_cell.MultiRNNCell(
        [nn.rnn_cell.BasicLSTMCell(LSTMSIZE) for _ in range(LSTMNUM)])
    state_input = cell.zero_state(batch_size, dtype=tf.float32)
    output, state_output = nn.dynamic_rnn(cell, cell_input, input_len,
                                          state_input)
    return output, state_input, state_output
Beispiel #7
0
def build_rnn(cell, input, input_len, batch_size, name):
    state_input = cell.zero_state(batch_size, dtype=tf.float32)
    output, state_output = nn.dynamic_rnn(cell,
                                          input,
                                          input_len,
                                          state_input,
                                          scope=name)
    return output, state_input, state_output
Beispiel #8
0
def GRULayer(input_tensor, num_layers=1):
    input_tensor = tf.reshape(
        input_tensor,
        shape=[-1, ConfigUtil.seq_length, ConfigUtil.hidden_size])
    cell = GRUCell(num_units=128, kernel_initializer=create_initializer())
    cell = DropoutWrapper(cell, output_keep_prob=(1 - ConfigUtil.dropout_prob))
    cell = MultiRNNCell([cell] * num_layers) if num_layers > 1 else cell
    outputs, state = dynamic_rnn(cell, input_tensor, dtype=tf.float32)
    return outputs
Beispiel #9
0
def build_encoder(input, input_len, embedding, batch_size, training):
    with tf.variable_scope('encoder_pre'):
        input = build_preprocess(input, embedding, training)
    cell = build_cell(training)
    state_input = cell.zero_state(batch_size, dtype=tf.float32)
    output, state_output = nn.dynamic_rnn(cell,
                                          input,
                                          input_len,
                                          state_input,
                                          scope='encoder')
    return output, state_input, state_output
Beispiel #10
0
    def decode(self, fea_vec, transitions, seq_len):
        """pass"""
        forward_cell = CrfDecodeForwardRnnCell(transitions)

        first_vec = tf.squeeze(tf.slice(fea_vec, [0, 0, 0], [-1, 1, -1]),
                               [1])  # [batch, num_tag]
        rest_vec = tf.squeeze(tf.slice(
            fea_vec, [0, 1, 0], [-1, -1, -1]))  # [batch, len - 1, num_tag]

        sequence_lengths_less_one = tf.maximum(
            tf.constant(0, dtype=seq_len.dtype), seq_len - 1)

        backpointers, scores = dynamic_rnn(
            cell=forward_cell,
            inputs=rest_vec,
            sequence_length=sequence_lengths_less_one,
            initial_state=first_vec,
            dtype=tf.int32)
        backpointers = tf.reverse_sequence(
            backpointers, sequence_lengths_less_one,
            seq_dim=1)  # [batch, len - 1, num_tag]

        # ===================
        num_tags = tf.dimension_value(tf.shape(transitions)[1])
        backward_cell = CrfDecodeBackwardRnnCell(num_tags)

        init_state = tf.cast(tf.argmax(scores, axis=1), dtype=tf.int32)
        init_state = tf.expand_dims(init_state, axis=1)  # [batch, 1]

        decode_tags, _ = dynamic_rnn(cell=backward_cell,
                                     inputs=backpointers,
                                     sequence_length=sequence_lengths_less_one,
                                     dtype=tf.int32)
        decode_tags = tf.squeeze(decode_tags, axis=[-1])  # [batch, len - 1]
        decode_tags = tf.concat([init_state, decode_tags],
                                axis=1)  # [batch, len]
        decode_tags = tf.reverse_sequence(decode_tags, seq_len, seq_dim=1)

        best_score = tf.reduce_max(scores, axis=[1])

        return decode_tags, best_score
Beispiel #11
0
def build_cell(cell_input, input_len, batch_size, training):
    cell = [nn.rnn_cell.BasicLSTMCell(LSTMSIZE) for _ in range(LSTMNUM)]
    if training:
        cell = [
            nn.rnn_cell.DropoutWrapper(c, output_keep_prob=DROPOUT)
            for c in cell
        ]
    cell = nn.rnn_cell.MultiRNNCell(cell)
    state_input = cell.zero_state(batch_size, dtype=tf.float32)
    output, state_output = nn.dynamic_rnn(cell, cell_input, input_len,
                                          state_input)
    return output, state_input, state_output
Beispiel #12
0
 def forward(self, x, computation_mode=MakiRestorable.INFERENCE_MODE):
     if self._dynamic:
         dynamic_x = dynamic_rnn(self._cell, x, dtype=tf.float32)
         # hidden states, (last candidate value, last hidden state)
         hs, (c_last, h_last) = dynamic_x
         return hs, c_last, h_last
     else:
         unstack_x = tf.unstack(x, axis=1)
         static_x = static_rnn(self._cell, unstack_x, dtype=tf.float32)
         hs_list, (c_last, h_last) = static_x
         hs = tf.stack(hs_list, axis=1)
         return hs, c_last, h_last
Beispiel #13
0
 def prediction(self):
     # Recurrent network.
     output_RNN, _ = nn.dynamic_rnn(
         nn.rnn_cell.GRUCell(self._num_RNNs),
         self.data,
         dtype=tf.float32,
         sequence_length=self.length,
     )      
     last = self._attention(output_RNN)        
     weight, bias = self._weight_and_bias(
         self._num_RNNs, int(self.target.get_shape()[1]))
     # Softmax layer.
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Beispiel #14
0
 def prediction(self):
     # Recurrent network.
     output, _ = nn.dynamic_rnn(
         nn.rnn_cell.GRUCell(self._num_hidden),
         data,
         dtype=tf.float32,
         sequence_length=self.length,
     )
     last = self._last_relevant(output, self.length)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Beispiel #15
0
 def __init__(self, nfeats, nlabels, hidden_size):
     super(rnn, self).__init__()
     self._nlabels = nlabels
     self._nfeats = nfeats
     self.input = (tf.placeholder(tf.float32,
                                  shape=(None, None, nfeats),
                                  name="data"),
                   tf.placeholder(tf.int32, shape=(None, ), name="lengths"))
     self.y = tf.placeholder(tf.int32, shape=(None, self._nlabels))
     cell = nn.rnn_cell.BasicRNNCell(hidden_size)
     _, self.state = nn.dynamic_rnn(cell,
                                    inputs=self.input[0],
                                    sequence_length=self.input[1],
                                    dtype=tf.float32)
     self.output = layers.dense(inputs=self.state, units=nlabels)
Beispiel #16
0
 def forward(self, X, is_training=False):
     if self.cell_type == CellType.Bidir_Dynamic:
         return bidirectional_dynamic_rnn(cell_fw=self.cells,
                                          cell_bw=self.cells,
                                          inputs=X,
                                          dtype=tf.float32)
     elif self.cell_type == CellType.Bidir_Static:
         X = tf.unstack(X, num=self.seq_length, axis=1)
         return static_bidirectional_rnn(cell_fw=self.cells,
                                         cell_bw=self.cells,
                                         inputs=X,
                                         dtype=tf.float32)
     elif self.cell_type == CellType.Dynamic:
         return dynamic_rnn(self.cells, X, dtype=tf.float32)
     elif self.cell_type == CellType.Static:
         X = tf.unstack(X, num=self.seq_length, axis=1)
         return static_rnn(self.cells, X, dtype=tf.float32)
Beispiel #17
0
	def __init__(self, num_classes, batch_size=64, num_steps= 50, sampling = False, num_layers = 2, lstm_size=128):
		# Testing/Training
		batch_size, num_steps = state(sampling, batch_size, num_steps)

		# Define inputs/outputs on graph
		self.inputs, self.targets, self.keep_prob = set_placeholders(batch_size, num_steps)

		#Define LSTM cells in model
		cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

		#one hot encoder
		x_one_hot = tf.one_hot(self.inputs, num_classes)

		#run
		outputs, state = dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
		self.initial_state = state

		#output
		self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
Beispiel #18
0
def define_rnn(batch_in_tf,
               seq_lens_tf,
               n_sharpe,
               n_time,
               n_ftrs,
               n_markets,
               allow_shorting=True,
               equality=False):
    """ Define a neural net for the Linear regressor.

    Args:
      batch_in_tf (n_batch, n_time, n_ftrs): Input data.
      seq_lens_tf (n_batch): Lengths of each batch sequence. Pad with zeros
        afterwards.
      state_in_tf: Symbolic init state. Can be None or returned by the rnn.
      n_sharpe (float): How many position-outputs to compute.
      n_time (float): Number of timesteps for input data.
      n_ftrs (float): Number of input features.
      W (n_ftrs * (n_time-n_sharpe+1), n_markets): Weight matrix.
      b (n_markets): Biases.
      zero_thr (scalar): Set smaller weights to zero.

    Returns:
      positions (n_batch, n_sharpe, n_markets): Positions for each market.
    """
    lstm_cell = tf_rnn.BasicLSTMCell(num_units=n_markets, state_is_tuple=True)
    cell_state = tf.placeholder(TF_DTYPE, [None, lstm_cell.state_size[0]])
    hidden_state = tf.placeholder(TF_DTYPE, [None, lstm_cell.state_size[0]])
    init_state = tf.contrib.rnn.LSTMStateTuple(cell_state, hidden_state)
    out, state_out_tf = tf_nn.dynamic_rnn(cell=lstm_cell,
                                          inputs=batch_in_tf,
                                          time_major=False,
                                          sequence_length=seq_lens_tf,
                                          initial_state=init_state,
                                          dtype=tf.float32)

    if allow_shorting:
        out = out / tf.reduce_sum(tf.abs(out), axis=2, keep_dims=True)
    else:
        out = tf.pow(out, 2)
        out = out / tf.reduce_sum(out, axis=2, keep_dims=True)
    return out, state_out_tf, (cell_state, hidden_state)
Beispiel #19
0
    def _build_gru(self):
        """Apply a GRU for modeling.

        Returns:
            obj: The output of GRU section.
        """
        with tf.name_scope("gru"):
            self.mask = self.iterator.mask
            self.sequence_length = tf.reduce_sum(self.mask, 1)
            self.history_embedding = tf.concat(
                [self.item_history_embedding, self.cate_history_embedding], 2)
            rnn_outputs, final_state = dynamic_rnn(
                GRUCell(self.hidden_size),
                inputs=self.history_embedding,
                sequence_length=self.sequence_length,
                dtype=tf.float32,
                scope="gru",
            )
            tf.summary.histogram("GRU_outputs", rnn_outputs)
            return final_state
Beispiel #20
0
    def _multi_seq_fn():
        """Forward computation of alpha values."""
        rest_of_input = tf.slice(inputs, [0, 1, 0], [-1, -1, -1])

        # Compute the alpha values in the forward algorithm in order to get the
        # partition function.
        forward_cell = CrfForwardRnnCell(transition_params)
        # Sequence length is not allowed to be less than zero.
        sequence_lengths_less_one = tf.maximum(
            tf.constant(0, dtype=sequence_lengths.dtype), sequence_lengths - 1)

        _, alphas = dynamic_rnn(cell=forward_cell,
                                inputs=rest_of_input,
                                sequence_length=sequence_lengths_less_one,
                                initial_state=first_input,
                                dtype=tf.float32)
        log_norm = tf.reduce_logsumexp(alphas, [1])
        # Mask `log_norm` of the sequences with length <= zero.
        log_norm = tf.where(tf.less_equal(sequence_lengths, 0),
                            tf.zeros_like(log_norm), log_norm)
        return log_norm
Beispiel #21
0
 def prediction(self):
     # Recurrent network.
     num_units = [self._num_RNN]
     cells = [nn.rnn_cell.GRUCell(n) for n in num_units]
     stacked_rnn = tf.contrib.rnn.MultiRNNCell(cells)
     output, _ = nn.dynamic_rnn(
         stacked_rnn,
         self.data,
         dtype=tf.float32,
         sequence_length=self.length,
     ) 
     batch_size = tf.shape(output)[0]
     max_length = int(output.get_shape()[1])
     output_size = int(output.get_shape()[2])
     target_size = int(self.target.get_shape()[2])
     
     output_reshape = tf.reshape(output, [-1, output_size])         
     
     weight, bias = self._weight_and_bias(
         self._num_RNN, target_size)
     # Tanh layer.
     prediction = self.ext * tf.nn.tanh(tf.matmul(output_reshape, weight) + bias)
     prediction_reshape = tf.reshape(prediction, [batch_size, max_length, target_size])              
     return prediction_reshape
Beispiel #22
0
 def forward(self, x, computation_mode=MakiRestorable.INFERENCE_MODE):
     if self._cell_type == CellType.BIDIR_DYNAMIC:
         (outputs_f, outputs_b), (states_f, states_b) = \
             bidirectional_dynamic_rnn(cell_fw=self._cells, cell_bw=self._cells, inputs=x, dtype=tf.float32)
         # Creation of the two MakiTensors for both `outputs_f` and `outputs_b` is inappropriate since
         # the algorithm that builds the computational graph does not consider such case and
         # therefore can not handle this situation, it will cause an error.
         self._cells_state = tf.concat([states_f, states_b], axis=-1)
         return tf.concat([outputs_f, outputs_b], axis=-1)
     elif self._cell_type == CellType.BIDIR_STATIC:
         x = tf.unstack(x, num=self._seq_length, axis=1)
         outputs_fb, states_f, states_b = \
             static_bidirectional_rnn(cell_fw=self._cells, cell_bw=self._cells, inputs=x, dtype=tf.float32)
         self._cells_state = tf.concat([states_f, states_f], axis=-1)
         return outputs_fb
     elif self._cell_type == CellType.DYNAMIC:
         outputs, states = dynamic_rnn(self._cells, x, dtype=tf.float32)
         self._cells_state = states
         return outputs
     elif self._cell_type == CellType.STATIC:
         x = tf.unstack(x, num=self._seq_length, axis=1)
         outputs, states = static_rnn(self._cells, x, dtype=tf.float32)
         self._cells_state = states
         return tf.stack(outputs, axis=1)
Beispiel #23
0
    def __init__(self, is_training=True):
        self.max_grad_norm = 5

        self.learning_rate = 0.003
        self.unit_lstm = 30
        self.unit = 30
        self.drop_rate = 0.7
        self.batch_size = 191

        #1:383 3:245 6:101 7:74 9:62 10:44 11:138 12:67 13:87 14:108 15:166
        #1:191 3:122 6:202 7:148 9:124 10:88 11:138 12:67 13:87 14:108 15:166

        self.length = 100
        self.fea_dim = 9
        self.raw = 4
        self.num_sub = 3

        self.input = tf.placeholder(tf.float32,
                                    [None, self.length, self.fea_dim])
        self.sbp_label = tf.placeholder(tf.float32, [None, 1])
        self.dbp_label = tf.placeholder(tf.float32, [None, 1])
        self.domain = tf.placeholder(tf.int32, [None, self.num_sub])
        self.l = tf.placeholder(tf.float32, [])
        self.train = tf.placeholder(tf.bool, [])

        with tf.variable_scope('feature_extractor'):
            inputs = tf.transpose(self.input, [1, 0, 2])
            inner_cell = BasicLSTMCell(self.unit_lstm)
            outputs, final_state = dynamic_rnn(inner_cell,
                                               inputs,
                                               time_major=True,
                                               dtype=tf.float32)

            if is_training:
                keep_prob = tf.constant(self.drop_rate)
            else:
                keep_prob = tf.constant(1.0)
            outputs = tf.nn.dropout(outputs,
                                    keep_prob)  #keep_prob = tf.constant(1.0)

            #idx = tf.range(self.batch_size)*tf.shape(outputs)[1] + (self.seq_len - 1)
            #output = tf.gather(tf.reshape(outputs, [-1, self.unit]), idx)
            outputs = tf.transpose(outputs, [1, 0, 2])  #batch, seq, hidden
            output = tf.slice(outputs, [0, self.length - 1, 0],
                              [-1, 1, self.unit_lstm])
            output = tf.squeeze(output, axis=1)

            output = tf.layers.dense(output,
                                     self.unit,
                                     activation=tf.nn.relu,
                                     name='shared_dense')

        with tf.variable_scope('label_predictor'):
            sbp_dense1 = tf.layers.dense(output,
                                         self.unit,
                                         activation=tf.nn.relu,
                                         name='sbp_dense1')
            dbp_dense1 = tf.layers.dense(output,
                                         self.unit,
                                         activation=tf.nn.relu,
                                         name='dbp_dense1')

            sbp_dense = tf.layers.dense(sbp_dense1,
                                        self.unit,
                                        activation=tf.nn.relu,
                                        name='sbp_dense2')
            dbp_dense = tf.layers.dense(dbp_dense1,
                                        self.unit,
                                        activation=tf.nn.relu,
                                        name='dbp_dense2')

            sbp_dense = tf.layers.dense(sbp_dense,
                                        self.unit,
                                        activation=tf.nn.relu,
                                        name='sbp_dense3')
            dbp_dense = tf.layers.dense(dbp_dense,
                                        self.unit,
                                        activation=tf.nn.relu,
                                        name='dbp_dense3')

            #sbp_dense4 = tf.layers.dense(sbp_dense3, 70, activation=tf.nn.relu,  name='sbp_dense4')
            #dbp_dense4 = tf.layers.dense(dbp_dense3, 70, activation=tf.nn.relu,  name='dbp_dense4')

            #sbp_dense = tf.layers.dense(sbp_dense4, 70, activation=tf.nn.relu,  name='sbp_dense5')
            #dbp_dense = tf.layers.dense(dbp_dense4, 70, activation=tf.nn.relu,  name='dbp_dense5')

            self.sbp = tf.layers.dense(sbp_dense, 1, name='sbp_out')
            self.dbp = tf.layers.dense(dbp_dense, 1, name='dbp_out')
            loss1 = tf.losses.mean_squared_error(self.sbp_label, self.sbp)
            loss2 = tf.losses.mean_squared_error(self.dbp_label, self.dbp)
            self.pred_losses = loss1 + loss2

        with tf.variable_scope('domain_predictor'):
            feat = flip_gradient(output, self.l)
            dom = tf.layers.dense(feat,
                                  30,
                                  activation=tf.nn.relu,
                                  name='domain1')
            dom = tf.layers.dense(dom, self.num_sub, name='domain2')

            self.domain_pred = tf.nn.softmax(dom)
            self.domain_losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.domain_pred, labels=self.domain)
Beispiel #24
0
    def __init__(self, **kwargs):
        '''The following arguments are accepted:

        Parameters
        ----------
        vocab_size  :   int
                        Size of the vocabulary for creating embeddings
        embedding_matrix    :   int
                                Dimensionality of the embedding space
        memory_size :   int
                        LSTM memory size
        keep_prob   :   float
                        Inverse of dropout percentage for embedding and LSTM
        subsequence_length  :   int
                                Length of the subsequences (all embeddings are padded to this
                                length)
        optimizer   :   OptimizerSpec
        '''
        ############################################################################################
        #                                 Get all hyperparameters                                  #
        ############################################################################################
        vocab_size = kwargs['vocab_size']
        embedding_size = kwargs['embedding_size']
        memory_size = kwargs['memory_size']
        keep_prob = kwargs['keep_prob']
        subsequence_length = kwargs['subsequence_length']
        optimizer_spec = kwargs['optimizer']
        optimizer = optimizer_spec.create()
        self.learning_rate = optimizer_spec.learning_rate
        self.step_counter = optimizer_spec.step_counter

        ############################################################################################
        #                                        Net inputs                                        #
        ############################################################################################
        self.batch_size = placeholder(tf.int32, shape=[], name='batch_size')
        self.is_training = placeholder(tf.bool, shape=[], name='is_training')
        self.word_ids = placeholder(tf.int32,
                                    shape=(None, subsequence_length),
                                    name='word_ids')
        self.labels = placeholder(tf.int32, shape=(None, ), name='labels')
        self.hidden_state = placeholder(tf.float32,
                                        shape=(None, memory_size),
                                        name='hidden_state')
        self.cell_state = placeholder(tf.float32,
                                      shape=(None, memory_size),
                                      name='cell_state')

        lengths = sequence_lengths(self.word_ids)

        ############################################################################################
        #                                        Embedding                                         #
        ############################################################################################
        self.embedding_matrix, _bias = get_weights_and_bias(
            (vocab_size, embedding_size))
        embeddings = cond(
            self.is_training, lambda: nn.dropout(nn.embedding_lookup(
                self.embedding_matrix, self.word_ids),
                                                 keep_prob=keep_prob),
            lambda: nn.embedding_lookup(self.embedding_matrix, self.word_ids))

        ############################################################################################
        #                                        LSTM layer                                        #
        ############################################################################################
        cell = BasicLSTMCell(memory_size, activation=tf.nn.tanh)

        # during inference, use entire ensemble
        keep_prob = cond(self.is_training, lambda: constant(keep_prob),
                         lambda: constant(1.0))
        cell = DropoutWrapper(cell, output_keep_prob=keep_prob)

        # what's the difference to just creating a zero-filled tensor tuple?
        self.zero_state = cell.zero_state(self.batch_size, tf.float32)
        state = LSTMStateTuple(h=self.cell_state, c=self.hidden_state)

        # A dynamic rnn creates the graph on the fly, so it can deal with embeddings of different
        # lengths. We do not need to unstack the embedding tensor to get rows, instead we compute
        # the actual sequence lengths and pass that
        # We are not sure how any of this works. Do we need to mask the cost function so the cell
        # outputs for _NOT_A_WORD_ inputs are ignored? Is the final cell state really relevant if it
        # was last updated with _NOT_A_WORD_ input? Does static_rnn absolve us of any of those
        # issues?
        outputs, self.state = nn.dynamic_rnn(cell,
                                             embeddings,
                                             sequence_length=lengths,
                                             initial_state=state)
        # Recreate tensor from list
        outputs = reshape(concat(outputs, 1),
                          [-1, subsequence_length * memory_size])
        self.outputs = reduce_mean(outputs)

        ############################################################################################
        #                        Fully connected layer, loss, and training                         #
        ############################################################################################
        ff1 = fully_connected(outputs, 2, with_activation=False, use_bias=True)
        loss = reduce_mean(
            nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                        logits=ff1))
        self.train_step = optimizer.minimize(loss,
                                             global_step=self.step_counter)
        self.predictions = nn.softmax(ff1)
        correct_prediction = equal(cast(argmax(self.predictions, 1), tf.int32),
                                   self.labels)
        self.accuracy = reduce_mean(cast(correct_prediction, tf.float32))

        ############################################################################################
        #                                    Create summaraies                                     #
        ############################################################################################
        with tf.variable_scope('summary'):
            self.summary_loss = tf.summary.scalar('loss', loss)
            self.summary_accuracy = tf.summary.scalar('accuracy',
                                                      self.accuracy)
Beispiel #25
0
def Build_Im2txt(kwargs):
        ''' IM2TXT'''
        with tf.name_scope('IM2TXT'):
            with Builder(**kwargs) as im2txt_builder:
                '''
                input_placeholder = tf.placeholder(tf.float32, \
                    shape=[None, kwargs['Image_width']*kwargs['Image_height']*kwargs['Image_cspace']], name='Input')
                #dropout_prob_placeholder = tf.placeholder(tf.float32, name='Dropout')
                #state_placeholder = tf.placeholder(tf.string, name="State")
                #input_reshape = im2txt_builder.Reshape_input(input_placeholder, width=kwargs['Image_width'], height=kwargs['Image_height'], colorspace= kwargs['Image_cspace'])
                #Redundant feature extractor already creates this placeholder
                '''
                if kwargs['State'] is 'Train':
                    input_seq_placeholder = tf.placeholder(tf.int32, shape=[None, kwargs['Padded_length']], name='Input_Seq')
                    target_seq_placeholder = tf.placeholder(tf.int32, shape=[None, kwargs['Padded_length']], name='Target_Seq')
                elif kwargs['State'] is 'Test':
                    input_seq_placeholder = tf.placeholder(tf.int32, shape=[None, 1], name='Input_Seq')
                    target_seq_placeholder = tf.placeholder(tf.int32, shape=[None, 1], name='Target_Seq')

                mask_placeholder = tf.placeholder(tf.int32, shape=[None, kwargs['Padded_length']], name='Seq_Mask')
                Lstm_state_placeholder = tf.placeholder(tf.float32, shape=[])

                '''
                TODO:
                Get input_seq, mask and target seq from reader
                Init inception-resnet correctly and attach input from reader to input_placeholder of inception-resnet
                Understand and build deploy state
                Seperate implementation of loss and construction of network
                '''

                #reader will give input seq, mask and target seq
                #show tell init
                initalizer = tf.random_uniform_initializer(minval=-0.08 , maxval=0.08)


                #Building feature extractor
                Build_Inception_Resnet_v2a(kwargs)
                
                #Extracting necessary variables from feature extractor
                with tf.name_scope('Feature_Extractor'):
                    inception_output = tf.get_collection(kwargs['Model_name'] + '_Incepout')[0]
                    inception_state = tf.get_collection(kwargs['Model_name'] + '_State')[0]
                    inception_dropout = tf.get_collection(kwargs['Model_name'] + '_Dropout_prob_ph')[0]

                #Setting control params
                im2txt_builder.control_params(Dropout_control=inception_dropout, State=inception_state)

                #Image embeddings
                with tf.name_scope('Lstm_Embeddings'):
                    image_embeddings = im2txt_builder.FC_layer(inception_output, filters=512)
                    image_embeddings_size= tf.shape(image_embeddings)
                    #Seq embeddings
                    embeddings_map = tf.get_variable(name='Map', shape=[40,512], initializer=initalizer)
                    seq_embeddings = tf.nn.embedding_lookup(embeddings_map, input_seq_placeholder) 


                    lstm_cell = im2txt_builder.Lstm_cell_LayerNorm()


                    #lstm_cell = im2txt_builder.Lstm_cell();
                    #lstm_cell = im2txt_builder.Rnn_dropout(lstm_cell)
                    
                with tf.variable_scope("lstm") as lstm_scope:
                    zero_state = lstm_cell.zero_state(batch_size=image_embeddings_size[0], dtype=tf.float32)
                    _, initial_stae = lstm_cell(image_embeddings, zero_state)

                    lstm_scope.reuse_variables()
                    if kwargs['State'] is 'Test':
                        state_feed = tf.placeholder(dtype=tf.float32, shape=[None, sum(lstm_cell.state_size)], name='State_feed')
                        state_tuple = tf.split(value=state_feed, num_or_size_splits=2, axis=1)
                        lstm_outputs, state_tuple = lstm_cell(inputs = tf.squeeze(seq_embeddings, axis=[1]), state=state_tuple)
                        concat_input = tf.concat(values= initial_stae, axis=1)
                        concat_state = tf.concat(values=state_tuple, axis=1)

                    elif kwargs['State'] is 'Train':
                        sequence_length = tf.reduce_sum(mask_placeholder, 1) #Add sequence_mask 
                        lstm_outputs, _ =nn.dynamic_rnn(cell=lstm_cell, inputs=seq_embeddings, sequence_length=sequence_length, initial_state=initial_stae, dtype=tf.float32, scope=lstm_scope)

                    with tf.name_scope('Lstm_output'):
                        lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size])

                        logits = im2txt_builder.FC_layer(lstm_outputs, filters=40, readout=True)
                    #Target seq and losses next 
                    with tf.name_scope('Lstm_loss'):
                        if kwargs['State'] is 'Train':
                            targets = tf.reshape(target_seq_placeholder, [-1]) #flattening target seqs
                            weights = tf.to_float(tf.reshape(mask_placeholder, [-1]))

                            with tf.name_scope('Softmax_CE_loss'):
                                seq_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits)
                                batch_loss = tf.div(tf.reduce_sum(tf.multiply(seq_loss, weights)), tf.maximum(tf.reduce_sum(weights),1))


                    tf.add_to_collection(kwargs['Model_name'] + '_Input_seq_ph', input_seq_placeholder)
                    tf.add_to_collection(kwargs['Model_name'] + '_Output_ph', target_seq_placeholder)
                    tf.add_to_collection(kwargs['Model_name'] + '_Mask_ph', mask_placeholder)
                    tf.add_to_collection(kwargs['Model_name'] + '_Output', logits)

                    if kwargs['State'] is 'Test':
                        tf.add_to_collection(kwargs['Model_name'] + '_Initial_state', concat_input)
                        tf.add_to_collection(kwargs['Model_name'] + '_Lstm_state_feed', state_feed)
                        tf.add_to_collection(kwargs['Model_name'] + '_Lstm_state', concat_state)

                    elif kwargs['State'] is 'Train':
                        tf.add_to_collection(kwargs['Model_name'] + '_Loss', batch_loss)

                    #Test output next

                    return 'Sequence'
Beispiel #26
0
    def fit(self,
            X,
            y,
            num_epochs,
            embedding_dims,
            V,
            K,
            hidden_dims,
            lr,
            beta1=0.95,
            beta2=0.95,
            batch_sz=32):
        X_train, X_valid, y_train, y_valid = train_test_split(X,
                                                              y,
                                                              test_size=0.25)
        len_t = max(len(x) for x in X_train)
        len_v = max(len(x) for x in X_valid)
        X_train = pad_sequences(X_train, len_t)
        y_train = pad_sequences(y_train, len_t)
        X_valid = pad_sequences(X_valid, len_v)
        y_valid = pad_sequences(y_valid, len_v)

        We = np.random.randn(V, embedding_dims)
        Wo = np.random.randn(hidden_dims, K)
        bo = np.zeros(K)
        N = X_train.shape[0]

        tf_X = tf.placeholder(tf.int32, (None, None))
        tf_y = tf.placeholder(tf.int32, (None, None))

        self.tf_X = tf_X
        self.tf_y = tf_y

        tf_We = tf.Variable(We, dtype=tf.float32)
        tf_Wo = tf.Variable(Wo, dtype=tf.float32)
        tf_bo = tf.Variable(bo, dtype=tf.float32)

        emb = tf.nn.embedding_lookup(tf_We, tf_X)
        cell = LSTMCell(hidden_dims, activation=tf.nn.relu)
        outputs, states = dynamic_rnn(cell, emb, dtype=tf.float32)
        outputs = tf.reshape(
            outputs, (tf.shape(tf_X)[0] * tf.shape(tf_X)[1], hidden_dims))

        logits = tf.matmul(outputs, tf_Wo) + tf_bo
        labels = tf.reshape(tf_y, [-1])
        cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                           logits=logits))
        train_op = tf.train.AdamOptimizer(learning_rate=lr,
                                          beta1=beta1,
                                          beta2=beta2).minimize(cost)

        predictions = tf.argmax(logits, 1)
        predictions = tf.reshape(predictions,
                                 (tf.shape(tf_X)[0], tf.shape(tf_X)[1]))

        self.predictions = predictions

        num_batches = N // batch_sz
        t_costs = []
        v_costs = []
        init = tf.global_variables_initializer()
        self.session.run(init)

        for epoch in range(num_epochs):
            t0 = datetime.now()
            X_train, y_train = shuffle(X_train, y_train)
            t_cost = 0
            v_cost = 0
            for i in range(num_batches):
                X_batch = X_train[i * batch_sz:(i + 1) * batch_sz]
                y_batch = y_train[i * batch_sz:(i + 1) * batch_sz]
                len_b = max(len(x) for x in X_batch)
                X_batch = pad_sequences(X_batch, len_b)
                y_batch = pad_sequences(y_batch, len_b)
                self.session.run(train_op,
                                 feed_dict={
                                     tf_X: X_batch,
                                     tf_y: y_batch
                                 })
                if i % 100 == 0:
                    t_c, t_pred = self.session.run((cost, predictions),
                                                   feed_dict={
                                                       tf_X: X_train,
                                                       tf_y: y_train
                                                   })
                    t_cost += t_c
                    t_acc = accuracy(t_pred, y_train)
                    v_c, v_pred = self.session.run((cost, predictions),
                                                   feed_dict={
                                                       tf_X: X_valid,
                                                       tf_y: y_valid
                                                   })
                    v_cost += v_c
                    v_acc = accuracy(v_pred, y_valid)
                    print('train cost: %f, train accuracy: %f' %
                          (t_cost, t_acc))
                    print('valid cost: %f, valid accuracy: %f' %
                          (v_cost, v_acc))
            t_costs.append(t_cost)
            v_costs.append(v_cost)
            print('Epoch completed in %s' % (datetime.now() - t0))

        plt.plot(t_costs)
        plt.plot(v_costs)
        plt.show()
Beispiel #27
0
    def construct(self):
        self.saved_session_name = os.path.join(self.tmp_folder, self.uuid_code)
        self.input_data = tf.placeholder(tf.float32,
                                         [None, None, self.input_dim])
        self.output_data = tf.placeholder(tf.float32,
                                          [None, None, self.output_dim])
        self.start_tokens = tf.placeholder(tf.float32, [None, self.output_dim])
        self.go_tokens = tf.placeholder(tf.float32, [None, 1, self.output_dim])
        self.sequence_length = tf.placeholder(tf.int32, [None])
        self.mask = tf.placeholder(tf.float32, [None, None])
        self.target_sequence_length = tf.placeholder(
            tf.int32, (None, ), name='target_sequence_length')
        self.max_target_sequence_length = tf.reduce_max(
            self.target_sequence_length, name='max_target_len')
        self.source_sequence_length = tf.placeholder(
            tf.int32, (None, ), name='source_sequence_length')
        self.x_stopping = np.full((self.stop_pad_length, self.input_dim),
                                  self.stop_pad_token,
                                  dtype=np.float32)
        self.y_stopping = np.full((self.stop_pad_length, self.output_dim),
                                  self.stop_pad_token,
                                  dtype=np.float32)
        self.learning_rate = tf.placeholder(tf.float32)
        self.batch_size = tf.placeholder(tf.float32)

        enc_cell = make_cell(self.layer_sizes, self.keep_prob)

        # We want to train the decoder to learn the stopping point as well,
        # so the sequence lengths is extended for both the decoder and the encoder
        # logic: the encoder will learn that the stopping token is the signal that the input is finished
        #        the decoder will learn to produce the stopping token to match the expected output
        #        the inferer will learn to produce the stopping token for us to recognise that and stop inferring
        self.source_sequence_length_padded = self.source_sequence_length + self.stop_pad_length
        self.target_sequence_length_padded = self.target_sequence_length + self.stop_pad_length
        max_target_sequence_length_padded = self.max_target_sequence_length + self.stop_pad_length

        _, self.enc_state = dynamic_rnn(
            enc_cell,
            self.input_data,
            sequence_length=self.source_sequence_length_padded,
            dtype=tf.float32,
            time_major=False,
            swap_memory=True)
        self.enc_state_centre = self.enc_state[-1]

        if self.symmetric:
            self.enc_state = self.enc_state[::-1]
            dec_cell = make_cell(self.layer_sizes[::-1], self.keep_prob)
        else:
            dec_cell = make_cell(self.layer_sizes, self.keep_prob)

        # 3. Dense layer to translate the decoder's output at each time
        # step into a choice from the target vocabulary
        projection_layer = tf.layers.Dense(
            units=self.output_dim,
            # kernel_initializer=tf.initializers.he_normal(),
            # kernel_regularizer=regularizer,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))

        # 4. Set up a training decoder and an inference decoder
        # Training Decoder
        with tf.variable_scope("decode"):
            # During PREDICT mode, the output data is none so we can't have a training model.
            # Helper for the training process. Used by BasicDecoder to read inputs.
            dec_input = tf.concat([self.go_tokens, self.output_data], 1)
            training_helper = TrainingHelper(
                inputs=dec_input,
                sequence_length=self.target_sequence_length_padded,
                time_major=False)

            # Basic decoder
            training_decoder = BasicDecoder(dec_cell, training_helper,
                                            self.enc_state, projection_layer)

            # Perform dynamic decoding using the decoder
            self.training_decoder_output\
                = dynamic_decode(training_decoder,
                                 # True because we're using variable length sequences, which have finish points
                                 impute_finished=True,
                                 maximum_iterations=max_target_sequence_length_padded)[0]
        # 5. Inference Decoder
        # Reuses the same parameters trained by the training process
        with tf.variable_scope("decode", reuse=True):

            def end_fn(time_step_value):
                # Ideally, the inferer should produce the stopping token
                # Which can be assessed as being equal to the modelled stop token, and this should be return:
                # return tf.reduce_all(tf.equal(time_step_value, self.y_stopping))

                # However due to the nature of training, the produced stop token will never be exactly the same
                # as the modelled one. If we use an embedded layer, then this top token can be learned
                # however as we are not using the embedded layer, this function should return False
                # meaning there is no early stop
                return False

            inference_helper = InferenceHelper(sample_fn=lambda x: x,
                                               sample_shape=[self.output_dim],
                                               sample_dtype=dtypes.float32,
                                               start_inputs=self.start_tokens,
                                               end_fn=end_fn)

            # Basic decoder
            inference_decoder = BasicDecoder(dec_cell, inference_helper,
                                             self.enc_state, projection_layer)

            # Perform dynamic decoding using the decoder
            self.inference_decoder_output = dynamic_decode(
                inference_decoder,
                # True because we're using variable length sequences, which have finish points
                impute_finished=True,
                maximum_iterations=max_target_sequence_length_padded)[0]
Beispiel #28
0
# tf.reset_default_graph()
encode_input = tf.placeholder(shape=[None, None],
                              dtype=tf.int32,
                              name='encode_input')
decode_target = tf.placeholder(shape=[None, None],
                               dtype=tf.int32,
                               name='encode_input')
decode_input = tf.placeholder(shape=[None, None],
                              dtype=tf.int32,
                              name='encode_input')
embedding = tf.Variable(tf.random_uniform([4, 10], -1.0, 1.0),
                        dtype=tf.float32)  #生成词汇表,前面是字符数量,后面是词嵌入大小
encode_embedding = tf.nn.embedding_lookup(embedding, encode_input)
decode_embedding = tf.nn.embedding_lookup(embedding, decode_input)
lstm_cell = LSTMCell(4)
outputs, states = dynamic_rnn(lstm_cell, encode_embedding, dtype=tf.float32)
print('states is ', states)
# y=tf.unstack(y,4,1)/
lstm_cell2 = LSTMCell(num_units=4)
logit, states2 = dynamic_rnn(lstm_cell2,
                             decode_embedding,
                             dtype=tf.float32,
                             initial_state=states,
                             scope='decode_output')

print('2')
la = tf.one_hot(y_target, depth=4, dtype=tf.float32)
print(la)
pre = tf.nn.softmax(logit)
print('logit is ', logit)
print('pre is ', pre)
Beispiel #29
0
    def _build_seq_graph(self):
        """The main function to create sli_rec model.
        
        Returns:
            obj:the output of sli_rec section.
        """
        hparams = self.hparams
        with tf.variable_scope("sli_rec"):
            hist_input = tf.concat(
                [self.item_history_embedding, self.cate_history_embedding], 2)
            self.mask = self.iterator.mask
            self.sequence_length = tf.reduce_sum(self.mask, 1)

            with tf.variable_scope("long_term_asvd"):
                att_outputs1 = self._attention(hist_input,
                                               hparams.attention_size)
                att_fea1 = tf.reduce_sum(att_outputs1, 1)
                tf.summary.histogram("att_fea1", att_fea1)

            item_history_embedding_new = tf.concat(
                [
                    self.item_history_embedding,
                    tf.expand_dims(self.iterator.time_from_first_action, -1),
                ],
                -1,
            )
            item_history_embedding_new = tf.concat(
                [
                    item_history_embedding_new,
                    tf.expand_dims(self.iterator.time_to_now, -1),
                ],
                -1,
            )
            with tf.variable_scope("rnn"):
                rnn_outputs, final_state = dynamic_rnn(
                    Time4LSTMCell(hparams.hidden_size),
                    inputs=item_history_embedding_new,
                    sequence_length=self.sequence_length,
                    dtype=tf.float32,
                    scope="time4lstm",
                )
                tf.summary.histogram("LSTM_outputs", rnn_outputs)

            with tf.variable_scope("attention_fcn"):
                att_outputs2 = self._attention_fcn(self.target_item_embedding,
                                                   rnn_outputs)
                att_fea2 = tf.reduce_sum(att_outputs2, 1)
                tf.summary.histogram("att_fea2", att_fea2)

            # ensemble
            with tf.name_scope("alpha"):
                concat_all = tf.concat(
                    [
                        self.target_item_embedding,
                        att_fea1,
                        att_fea2,
                        tf.expand_dims(self.iterator.time_to_now[:, -1], -1),
                    ],
                    1,
                )
                last_hidden_nn_layer = concat_all
                alpha_logit = self._fcn_net(last_hidden_nn_layer,
                                            hparams.att_fcn_layer_sizes,
                                            scope="fcn_alpha")
                alpha_output = tf.sigmoid(alpha_logit)
                user_embed = att_fea1 * alpha_output + att_fea2 * (
                    1.0 - alpha_output)
            model_output = tf.concat([user_embed, self.target_item_embedding],
                                     1)
            tf.summary.histogram("model_output", model_output)
            return model_output