def exe_encoder(self, enc, enc_weather): with tf.variable_scope("encoder", initializer=self.initializer): # add cnns layers here # now, default using batch norm in generator encoder cnn = rnn_utils.get_cnn_rep(enc, mtype=self.mtype, use_batch_norm=True, dropout=self.dropout) cnn = tf.layers.flatten(cnn) cnn_shape = cnn.get_shape() last_dim = int(cnn_shape[-1]) enc_data = tf.reshape( cnn, [self.batch_size, self.encoder_length, last_dim]) # if use weather attention => concate pollutants output with weather outputs if self.use_attention: with tf.variable_scope("weather_encoder", initializer=self.initializer): enc_weather_output, _ = rnn_utils.execute_sequence( enc_weather, self.e_params) # get hidden attention of encoder weathers enc_weather_hidden = rnn_utils.get_softmax_attention( enc_weather_output) enc_data = tf.concat([enc_data, enc_weather_output], axis=2) else: enc_weather_hidden = None # then push though lstm enc_outputs, fn_state = rnn_utils.execute_sequence( enc_data, self.e_params) enc_outputs = rnn_utils.get_softmax_attention(enc_outputs) return enc_outputs, fn_state, enc_weather_hidden
def exe_encoder(self, enc, use_batch_norm=None, dropout=None): if not dropout: dropout = self.dropout if not use_batch_norm: self.use_batch_norm = use_batch_norm with tf.variable_scope("encoder", initializer=self.initializer): if self.dtype == "grid": if self.use_cnn: # add one cnn layer here print(enc.get_shape()) cnn = rnn_utils.get_cnn_rep(enc, mtype=self.mtype, use_batch_norm=use_batch_norm, dropout=dropout) else: cnn = enc cnn = tf.layers.flatten(cnn) cnn_shape = cnn.get_shape() # last_dim = cnn_shape[-1] * cnn_shape[-2] # last_dim = int(cnn_shape[-1]) / self.encoder_length last_dim = int(cnn_shape[-1]) enc_data = tf.reshape( cnn, [self.batch_size, self.encoder_length, last_dim]) # enc_data = tf.unstack(enc_data, axis=1) else: enc_data = tf.reduce_mean(enc, axis=1) # then push through lstm enc_output, fn_state = rnn_utils.execute_sequence( enc_data, self.e_params) if self.rnn_layers > 1: fn_state = fn_state[-1] return fn_state, enc_output
def create_generator(self, enc, dec, att): # shape: batch_size x encoder_length x 1024 enc, dec, att = self.lookup_input(enc, dec, att) with tf.variable_scope("generator", self.initializer, reuse=tf.AUTO_REUSE): enc_outputs = self.exe_encoder(enc) """ because transportation is a local problem => weather data don't need to be converted to grid heatmap Use simple lstm gru to visualize the fluctuation of weather feature => an attentional vector """ if self.use_attention: att = tf.reshape( tf.transpose(att, [0, 2, 1, 3]), [pr.batch_size * 25, self.attention_length, 9]) att_outputs, _ = rnn_utils.execute_sequence(att, self.e_params) att_outputs = self.get_softmax_attention(att_outputs) att_outputs = tf.reshape(att_outputs, [pr.batch_size, 25, 128]) att_outputs = tf.layers.dense(att_outputs, 32, activation=tf.nn.tanh, name="attention_weathers") att_outputs = tf.layers.flatten(att_outputs) conditional_vectors = self.add_conditional_layer( enc_outputs, att_outputs) outputs = self.exe_decoder(conditional_vectors) else: outputs = self.exe_decoder(enc_outputs) conditional_vectors = None return outputs, conditional_vectors
def create_generator(self, enc, dec): with tf.variable_scope("generator", self.initializer, reuse=tf.AUTO_REUSE): # shape: batch_size x decoder_length x grid_size x grid_size enc, enc_w, dec_w = self.lookup_input(enc, dec) en_hidden_vector, fn_state, enc_w_h = self.exe_encoder(enc, enc_w) # use future weather forecast for decoder if self.use_attention: with tf.variable_scope("future_forecast", self.initializer, reuse=tf.AUTO_REUSE): forecast_outputs, _ = rnn_utils.execute_sequence( dec_w, self.e_params) forecast_attention = rnn_utils.get_softmax_attention( forecast_outputs) # concate enc_w_rep, dec_w_rep weather_hidden_vector = tf.concat( [en_hidden_vector, enc_w_h, forecast_attention], axis=1) outputs, classes = self.exe_decoder(forecast_outputs, fn_state, weather_hidden_vector, enc) else: outputs, classes = self.exe_decoder(None, fn_state, en_hidden_vector, enc) return outputs, classes
def create_generator(self, enc, dec, att): # shape: batch_size x encoder_length x 1024 enc, dec, att = self.lookup_input(enc, dec, att) with tf.variable_scope("generator", self.initializer, reuse=tf.AUTO_REUSE): enc_outputs = self.exe_encoder(enc) att = tf.reshape(tf.transpose(att, [0, 2, 1, 3]), [pr.batch_size * 25, self.attention_length, 9]) att_outputs, _ = rnn_utils.execute_sequence(att, self.e_params) att_outputs = self.get_softmax_attention(att_outputs) att_outputs = tf.reshape(att_outputs, [pr.batch_size, 25, 128]) att_outputs = tf.layers.dense(att_outputs, 32, activation=tf.nn.tanh, name="attention_weathers") att_outputs = tf.layers.flatten(att_outputs) conditional_vectors = self.add_conditional_layer(enc_outputs, att_outputs) outputs = self.exe_decoder(conditional_vectors) return outputs, conditional_vectors
def exe_encoder(self, enc): params = copy.deepcopy(self.e_params) params["fw_cell_size"] = 256 with tf.variable_scope("encoder", initializer=self.initializer, reuse=tf.AUTO_REUSE): msf_output = self.add_msf_networks(enc) hidden_output = tf.reshape(msf_output, shape=(pr.batch_size, self.encoder_length, 256)) # go to lstm lstm_output, _ = rnn_utils.execute_sequence(hidden_output, params) lstm_output = self.get_softmax_attention(lstm_output) return lstm_output
def get_attention_rep(self, inputs, attention_length, lstm_cell, hidden_size): with tf.variable_scope("attention_rep", initializer=self.initializer, reuse=tf.AUTO_REUSE): params = { "fw_cell": lstm_cell, "fw_cell_size": hidden_size } outputs, _ = rnn_utils.execute_sequence(inputs, params) # outputs = tf.stack(outputs, axis=1) attention_logits = tf.squeeze(tf.layers.dense(outputs, units=1, activation=None, name="attention_logits")) attention = tf.nn.softmax(attention_logits) outputs = tf.transpose(outputs, [2, 0, 1]) outputs = tf.multiply(outputs, attention) outputs = tf.transpose(outputs, [1, 2, 0]) outputs = tf.reduce_sum(outputs, axis=1) return outputs
def get_attention_rep(self, inputs): with tf.variable_scope("attention_rep", initializer=self.initializer, reuse=tf.AUTO_REUSE): params = { "fw_cell": self.e_params["fw_cell"], "fw_cell_size": self.rnn_hidden_units, "direction": self.e_params["direction"], "rnn_layer": self.e_params["rnn_layer"] } inputs.set_shape((self.batch_size, self.attention_length, self.atttention_hidden_size)) # inputs = tf.unstack(inputs, self.attention_length, 1) outputs, _ = rnn_utils.execute_sequence(inputs, params) # outputs = tf.stack(outputs, axis=1) outputs = self.get_softmax_attention(outputs) return outputs
def exe_decoder(self, dec_hidden_vectors): params = copy.deepcopy(self.e_params) params["fw_cell_size"] = 256 with tf.variable_scope("decoder", initializer=self.initializer, reuse=tf.AUTO_REUSE): dec_inputs_vectors = tf.tile(dec_hidden_vectors, [1, self.decoder_length]) dec_inputs_vectors = tf.reshape(dec_inputs_vectors, [pr.batch_size, self.rnn_hidden_units, self.decoder_length]) dec_inputs_vectors = tf.transpose(dec_inputs_vectors, [0, 2, 1]) # dec_inputs_vectors with shape bs x 24 x 256: concatenation of conditional layer vector & uniform random 128D dec_inputs_vectors = tf.concat([dec_inputs_vectors, self.z], axis=2) dec_concat_vectors = tf.layers.dense(dec_inputs_vectors, 256, name="generation_hidden_seed", activation=tf.nn.tanh) dec_outputs, _ = rnn_utils.execute_sequence(dec_concat_vectors, params) # change to shape bs*24 x 256 => fast execute cnns dec_outputs = tf.reshape(dec_outputs, [pr.batch_size * self.decoder_length, 4, 4, 16]) generate_outputs = rnn_utils.get_cnn_rep(dec_outputs, 2, tf.nn.relu, 8, self.use_batch_norm, self.dropout, False) generate_outputs = tf.tanh(tf.layers.flatten(generate_outputs)) outputs = tf.reshape(generate_outputs, [pr.batch_size, self.decoder_length, pr.grid_size * pr.grid_size]) return outputs
def add_conditional_layer(self, dec, enc_outputs, attention=None): if self.use_encoder: with tf.variable_scope("encoder_attention", initializer=self.initializer): enc_outputs = self.get_softmax_attention(enc_outputs) # enc_outputs = tf.reduce_mean(enc_outputs, axis=1) # self.enc_att_dis = enc_att_dis # add attentional layer here to measure the importance of each timestep. (past hidden, future forecast, china) with tf.variable_scope("conditional", initializer=self.initializer): # use weather to validate impact factor to seoul weather if self.use_weather: cnn_dec_input = rnn_utils.get_cnn_rep(dec, mtype=self.mtype, use_batch_norm=self.use_batch_norm, dropout=self.dropout) cnn_dec_input = tf.layers.flatten(cnn_dec_input) cnn_shape = cnn_dec_input.get_shape() dec_data = tf.reshape(cnn_dec_input, [self.batch_size, self.decoder_length, int(cnn_shape[-1])]) dec_rep, _ = rnn_utils.execute_sequence(dec_data, self.e_params) # this is forecast weather vector # hidden_input = tf.reduce_mean(dec_rep, axis=1) hidden_input = self.get_softmax_attention(dec_rep) if self.use_encoder: #concat encoder output and weather forecast hidden_input = tf.concat([enc_outputs, hidden_input], axis=1) else: hidden_input = enc_outputs if not attention is None and not hidden_input is None: # concate with china factor hidden_input = tf.concat([hidden_input, attention], axis=1) elif not attention is None: hidden_input = attention # get output shape to check the number of concatenation vectors: # if hidden_input is none then die hidden_input_shape = hidden_input.get_shape() if hidden_input_shape[-1] != 128: # dec_hidden_vectors with shape bs x 128 dec_hidden_vectors = tf.layers.dense(hidden_input, 128, name="conditional_layer", activation=tf.nn.tanh) if self.dropout: dec_hidden_vectors = tf.nn.dropout(dec_hidden_vectors, 0.5) else: # what if all vectors is none: dec_hidden_vectors = hidden_input return dec_hidden_vectors
def add_conditional_layer(self, dec, enc_outputs, attention=None): with tf.name_scope("conditional"): cnn_dec_input = rnn_utils.get_cnn_rep(dec, mtype=self.mtype, use_batch_norm=self.use_batch_norm, dropout=self.dropout) cnn_dec_input = tf.layers.flatten(cnn_dec_input) cnn_shape = cnn_dec_input.get_shape() dec_data = tf.reshape(cnn_dec_input, [self.batch_size, self.decoder_length, int(cnn_shape[-1])]) dec_rep, _ = rnn_utils.execute_sequence(dec_data, self.e_params) dec_rep = self.get_softmax_attention(dec_rep) # add attentional layer here to measure the importance of each timestep. enc_outputs = self.get_softmax_attention(enc_outputs) # dec_input with shape bs x 3hidden_size dec_input = tf.concat([enc_outputs, dec_rep], axis=1) if not attention is None: dec_input = tf.concat([dec_input, attention], axis=1) # dec_hidden_vectors with shape bs x 128 dec_hidden_vectors = tf.layers.dense(dec_input, 128, name="conditional_layer", activation=tf.nn.tanh) if self.dropout: dec_hidden_vectors = tf.nn.dropout(dec_hidden_vectors, 0.5) return dec_hidden_vectors
def inference(self): enc = self.lookup_input() with tf.variable_scope("encoder", initializer=self.initializer, reuse=tf.AUTO_REUSE): # feed data stations to a single net then concat with lstm layers # feed outputs to double net enc = tf.reshape(enc, shape=(pr.batch_size * 25, self.encoder_length, self.encoder_vector_size)) _, enc_lstm = rnn_utils.execute_sequence(enc, self.params) # b x 25 x 24 enc_nn = self.add_single_net(tf.layers.flatten(enc)) enc_combined = tf.concat([enc_lstm[-1], enc_nn], axis=1) enc_combined_s = enc_combined.get_shape() enc_combined = tf.reshape(enc_combined, shape=(pr.batch_size, 25, enc_combined_s[-1])) enc_outputs = self.add_upper_net(enc_combined) # concat station rep vectors with others' attention vectors outputs = [] for x in xrange(25): indices = range(25) del(indices[x]) others = tf.gather(enc_outputs, indices, axis=1) current = tf.squeeze(tf.gather(enc_outputs, [x], axis=1)) current_ = tf.reshape(tf.tile(current, [1, 24]), shape=(pr.batch_size, 24, 200)) others = tf.concat([others, current_], axis=1) with tf.name_scope("attention_score_%i" % x): attention_score = tf.layers.dense(others, units=1, name="softmax_score") attention_score = tf.nn.softmax(tf.squeeze(attention_score, axis=-1), name="softmax_prob") others = tf.transpose(others, [2, 0, 1]) attention_vectors = tf.multiply(others, attention_score) attention_vectors = tf.transpose(attention_vectors, [1, 2, 0]) attention_vectors = tf.reduce_sum(attention_vectors, axis=1) current = tf.concat([current, attention_vectors], axis=1) with tf.name_scope("prediction_%i" % x): # pred = tf.layers.dense(current, units=1, activation=tf.nn.sigmoid, name="predictions") pred = tf.layers.dense(current, units=self.decoder_length, activation=tf.nn.sigmoid, name="predictions") pred = tf.layers.dropout(pred, self.dropout_placeholder) tf.get_variable_scope().reuse_variables() outputs.append(pred) outputs = tf.stack(outputs, axis=1) outputs = tf.transpose(outputs, [0, 2, 1]) print(outputs) return outputs
def validate_output(self, inputs, conditional_vectors, is_fake=False): conditional_vectors = tf.reshape(conditional_vectors, [pr.batch_size * self.decoder_length, self.rnn_hidden_units]) inputs = tf.reshape(inputs, [pr.batch_size * self.decoder_length, pr.grid_size, pr.grid_size, 1]) inputs_rep = rnn_utils.get_cnn_rep(inputs, 3, tf.nn.leaky_relu, 8, self.use_batch_norm, self.dropout, False) inputs_rep = tf.layers.flatten(inputs_rep) inputs_rep = tf.concat([inputs_rep, conditional_vectors], axis=1) inputs_rep_shape = inputs_rep.get_shape() inputs_rep = tf.reshape(inputs_rep, [pr.batch_size, self.decoder_length, int(inputs_rep_shape[-1])]) # push through a GRU layer rnn_outputs, _ = rnn_utils.execute_sequence(inputs_rep, self.e_params) # real or fake value output = tf.layers.dense(rnn_outputs, 1, name="validation_value") output = tf.layers.flatten(output) rewards = None if is_fake: rewards = [None] * self.decoder_length pred_value = tf.log_sigmoid(output) pred_values = tf.unstack(pred_value, axis=1) for i in xrange(self.decoder_length - 1, -1,-1): rewards[i] = pred_values[i] if i != (self.decoder_length - 1): for j in xrange(i + 1, self.decoder_length): rewards[i] += np.power(self.gamma, (j - i)) * rewards[i] return output, rewards