def __init__(self, input_dim, adj_mat, max_diffusion_step, num_nodes, hid_dim, output_dim, num_rnn_layers): super(DCGRUDecoder, self).__init__() self.hid_dim = hid_dim self._num_nodes = num_nodes # 207 self._output_dim = output_dim # should be 1 self._num_rnn_layers = num_rnn_layers cell = DCGRUCell(input_dim=hid_dim, num_units=hid_dim, adj_mat=adj_mat, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes) cell_with_projection = DCGRUCell(input_dim=hid_dim, num_units=hid_dim, adj_mat=adj_mat, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, num_proj=output_dim) decoding_cells = list() # first layer of the decoder decoding_cells.append( DCGRUCell(input_dim=input_dim, num_units=hid_dim, adj_mat=adj_mat, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes)) # construct multi-layer rnn for _ in range(1, num_rnn_layers - 1): decoding_cells.append(cell) decoding_cells.append(cell_with_projection) self.decoding_cells = nn.ModuleList(decoding_cells)
def __init__(self, input_dim, adj_mat, max_diffusion_step, hid_dim, num_nodes, num_rnn_layers): super(DCRNNEncoder, self).__init__() self.hid_dim = hid_dim self._num_rnn_layers = num_rnn_layers # encoding_cells = [] encoding_cells = list() # the first layer has different input_dim encoding_cells.append( DCGRUCell(input_dim=input_dim, num_units=hid_dim, adj_mat=adj_mat, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes)) # construct multi-layer rnn for _ in range(1, num_rnn_layers): encoding_cells.append( DCGRUCell(input_dim=hid_dim, num_units=hid_dim, adj_mat=adj_mat, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes)) self.encoding_cells = nn.ModuleList(encoding_cells)
def __init__(self, is_training, config, scaler=None, adj_mx=None): super(DCRNNModel, self).__init__(config, scaler=scaler) batch_size = int(config.get('batch_size')) max_diffusion_step = int(config.get('max_diffusion_step', 2)) cl_decay_steps = int(config.get('cl_decay_steps', 1000)) filter_type = config.get('filter_type', 'laplacian') horizon = int(config.get('horizon', 1)) input_dim = int(config.get('input_dim', 1)) loss_func = config.get('loss_func', 'MSE') max_grad_norm = float(config.get('max_grad_norm', 5.0)) num_nodes = int(config.get('num_nodes', 1)) num_rnn_layers = int(config.get('num_rnn_layers', 1)) output_dim = int(config.get('output_dim', 1)) rnn_units = int(config.get('rnn_units')) seq_len = int(config.get('seq_len')) use_curriculum_learning = bool( config.get('use_curriculum_learning', False)) assert input_dim == output_dim, 'input_dim: %d != output_dim: %d' % ( input_dim, output_dim) # Input (batch_size, timesteps, num_sensor, input_dim) self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs') # Labels: (batch_size, timesteps, num_sensor, output_dim) self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, output_dim), name='labels') GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim)) cell = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, filter_type=filter_type) cell_with_projection = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, num_proj=output_dim, filter_type=filter_type) encoding_cells = [cell] * num_rnn_layers decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection] encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells, state_is_tuple=True) decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells, state_is_tuple=True) global_step = tf.train.get_or_create_global_step() # Outputs: (batch_size, timesteps, num_nodes, output_dim) with tf.variable_scope('DCRNN_SEQ'): inputs = tf.unstack(tf.reshape( self._inputs, (batch_size, seq_len, num_nodes * input_dim)), axis=1) labels = tf.unstack(tf.reshape( self._labels, (batch_size, horizon, num_nodes * output_dim)), axis=1) labels.insert(0, GO_SYMBOL) loop_function = None if is_training: if use_curriculum_learning: def loop_function(prev, i): c = tf.random_uniform((), minval=0, maxval=1.) threshold = self._compute_sampling_threshold( global_step, cl_decay_steps) result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev) return result else: # Return the output of the model. def loop_function(prev, _): return prev _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32) outputs, final_state = legacy_seq2seq.rnn_decoder( labels, enc_state, decoding_cells, loop_function=loop_function) # Project the output to output_dim. outputs = tf.stack(outputs[:-1], axis=1) self._outputs = tf.reshape( outputs, (batch_size, horizon, num_nodes, output_dim), name='outputs') preds = self._outputs[..., 0] labels = self._labels[..., 0] null_val = config.get('null_val', 0.) self._mae = masked_mae_loss(self._scaler, null_val)(preds=preds, labels=labels) if loss_func == 'MSE': self._loss = masked_mse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) elif loss_func == 'MAE': self._loss = masked_mae_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) elif loss_func == 'RMSE': self._loss = masked_rmse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) else: self._loss = masked_mse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) if is_training: optimizer = tf.train.AdamOptimizer(self._lr) tvars = tf.trainable_variables() grads = tf.gradients(self._loss, tvars) grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) self._train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step, name='train_op') self._merged = tf.summary.merge_all()
def __init__(self, is_training, batch_size, scaler, adj_mx, **model_kwargs): # Scaler for data normalization. self._scaler = scaler # Train and loss self._loss = None self._mae = None self._train_op = None max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) filter_type = model_kwargs.get('filter_type', 'laplacian') horizon = int(model_kwargs.get('horizon', 1)) max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) num_nodes = int(model_kwargs.get('num_nodes', 1)) num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) rnn_units = int(model_kwargs.get('rnn_units')) seq_len = int(model_kwargs.get('seq_len')) use_curriculum_learning = bool( model_kwargs.get('use_curriculum_learning', False)) input_dim = int(model_kwargs.get('input_dim', 1)) output_dim = int(model_kwargs.get('output_dim', 1)) aux_dim = input_dim - output_dim # Input (batch_size, timesteps, num_sensor, input_dim) self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs') # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension. self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, input_dim), name='labels') # 注:seq_len是输入的序列长度,horizon是输出的序列长度 # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim)) GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim)) cell = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, filter_type=filter_type) cell_with_projection = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, num_proj=output_dim, filter_type=filter_type) encoding_cells = [cell] * num_rnn_layers decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection] encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells, state_is_tuple=True) decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells, state_is_tuple=True) global_step = tf.train.get_or_create_global_step() # Outputs: (batch_size, timesteps, num_nodes, output_dim) with tf.variable_scope('DCRNN_SEQ'): inputs = tf.unstack(tf.reshape( self._inputs, (batch_size, seq_len, num_nodes * input_dim)), axis=1) labels = tf.unstack( # unstack把inputs和labels按timesteps拆分成12份,再组成一个list # unstack讲解:https://www.jianshu.com/p/25706575f8d4 tf.reshape(self._labels[..., :output_dim], (batch_size, horizon, num_nodes * output_dim)), axis=1) if aux_dim > 0: # ToDo: input_dim - output_dim > 0 时是怎么处理的? aux_info = tf.unstack(self._labels[..., output_dim:], axis=1) aux_info.insert(0, None) # insert(index, object) labels.insert(0, GO_SYMBOL) # ToDo: ? def _loop_function(prev, i): if is_training: # Return either the model's prediction or the previous ground truth in training. if use_curriculum_learning: # 【Scheduled Sampling策略】 c = tf.random_uniform((), minval=0, maxval=1.) threshold = self._compute_sampling_threshold( global_step, cl_decay_steps) result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev) else: result = labels[i] else: # Return the prediction of the model in testing. result = prev if False and aux_dim > 0: result = tf.reshape(result, (batch_size, num_nodes, output_dim)) result = tf.concat([result, aux_info[i]], axis=-1) result = tf.reshape(result, (batch_size, num_nodes * input_dim)) return result # 调库两行完成seq2seq的encoder-decoder过程 # legacy_seq2seq库是静态展开,即要求输入序列都是指定的长度;seq2seq库是动态展开 # 但是不管静态还是动态seq2seq库,输入的每一个batch内的序列长度都要一样。 # legacy_seq2seq模块讲解:https://blog.csdn.net/u012871493/article/details/72350332 _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32) outputs, final_state = legacy_seq2seq.rnn_decoder( labels, enc_state, decoding_cells, loop_function=_loop_function) # Project the output to output_dim. outputs = tf.stack(outputs[:-1], axis=1) self._outputs = tf.reshape( outputs, (batch_size, horizon, num_nodes, output_dim), name='outputs') self._merged = tf.summary.merge_all() # ToDo: merge_all?
def __init__(self, args, is_training, batch_size, scaler, adj_mx, **model_kwargs): # Scaler for data normalization. self._scaler = scaler # Train and loss self._loss = None self._mae = None self._train_op = None max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) diffusion_with_graph_kernel = model_kwargs.get( 'diffusion_with_graph_kernel', False) graph_kernel_mode = model_kwargs.get('graph_kernel_mode', 'local') cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) filter_type = model_kwargs.get('filter_type', 'laplacian') horizon = int(model_kwargs.get('horizon', 1)) max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) num_nodes = int(model_kwargs.get('num_nodes', 1)) num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) rnn_units = int(model_kwargs.get('rnn_units')) seq_len = int(model_kwargs.get('seq_len')) use_curriculum_learning = bool( model_kwargs.get('use_curriculum_learning', False)) input_dim = int(model_kwargs.get('input_dim', 1)) output_dim = int(model_kwargs.get('output_dim', 1)) pred_without_zero_input = model_kwargs.get('pred_without_zero_input') squeeze_and_excitation = model_kwargs.get('squeeze_and_excitation') se_activate = model_kwargs.get('se_activate') excitation_rate = model_kwargs.get('excitation_rate') r = model_kwargs.get('r') residuals = model_kwargs.get('residuals') cell_forward_mode = model_kwargs.get('cell_forward_mode') with_inputs_diffusion = model_kwargs.get('with_inputs_diffusion') with_inputs_channel_wise_attention = model_kwargs.get( 'with_inputs_channel_wise_attention') attention_mode = model_kwargs.get('attention_mode') aux_dim = input_dim - output_dim # Input (batch_size, timesteps, num_sensor, input_dim) self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs') # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension. self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, input_dim), name='labels') # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim)) GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim)) init_state = [ tf.zeros(shape=(batch_size, num_nodes * rnn_units)), tf.zeros(shape=(batch_size, num_nodes * rnn_units)) ] cell = DCGRUCell(args, rnn_units, adj_mx, squeeze_and_excitation, se_activate, excitation_rate, r, diffusion_with_graph_kernel, graph_kernel_mode, cell_forward_mode, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, filter_type=filter_type, use_gc_for_ru=True) cell_with_projection = DCGRUCell(args, rnn_units, adj_mx, squeeze_and_excitation, se_activate, excitation_rate, r, diffusion_with_graph_kernel, graph_kernel_mode, cell_forward_mode, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, num_proj=output_dim, filter_type=filter_type, use_gc_for_ru=True) test_cells = [cell] * num_rnn_layers #encoding_cells = [cell] * num_rnn_layers decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection] #encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells, state_is_tuple=True) test_cells = tf.contrib.rnn.MultiRNNCell(test_cells, state_is_tuple=True) decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells, state_is_tuple=True) global_step = tf.train.get_or_create_global_step() # Outputs: (batch_size, timesteps, num_nodes, output_dim) with tf.variable_scope('DCRNN_SEQ'): inputs = tf.unstack(tf.reshape( self._inputs, (batch_size, seq_len, num_nodes * input_dim)), axis=1) labels = tf.unstack( tf.reshape(self._labels[..., :output_dim], (batch_size, horizon, num_nodes * output_dim)), axis=1) if aux_dim > 0: aux_info = tf.unstack(self._labels[..., output_dim:], axis=1) aux_info.insert(0, None) if not pred_without_zero_input: labels.insert(0, GO_SYMBOL) def _loop_function(prev, i): if is_training: # Return either the model's prediction or the previous ground truth in training. if use_curriculum_learning: c = tf.random_uniform((), minval=0, maxval=1.) threshold = self._compute_sampling_threshold( global_step, cl_decay_steps) result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev) else: result = labels[i] else: # Return the prediction of the model in testing. result = prev if False and aux_dim > 0: result = tf.reshape(result, (batch_size, num_nodes, output_dim)) result = tf.concat([result, aux_info[i]], axis=-1) result = tf.reshape(result, (batch_size, num_nodes * input_dim)) return result output, state = rnn_encoder(inputs, init_state, test_cells, cell, with_inputs_diffusion, with_inputs_channel_wise_attention, attention_mode, r) if pred_without_zero_input: first_output = get_first_output(num_nodes, rnn_units, output_dim, output) outputs, final_state = rnn_decoder( labels, state, decoding_cells, cell, with_inputs_diffusion, with_inputs_channel_wise_attention, attention_mode, r, loop_function=_loop_function, ) outputs.insert(0, first_output) else: #_, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32) outputs, final_state = rnn_decoder( labels, state, decoding_cells, cell, with_inputs_diffusion, with_inputs_channel_wise_attention, attention_mode, r, loop_function=_loop_function) # Project the output to output_dim. if residuals and not pred_without_zero_input: first_base_outputs = tf.reshape( tf.reshape( inputs[-1], (batch_size, num_nodes, input_dim))[..., :output_dim], (batch_size, num_nodes * output_dim)) base_outputs = outputs[:-2] base_outputs.insert(0, first_base_outputs) base_outputs = tf.stack(base_outputs, axis=1) outputs = tf.stack(outputs[:-1], axis=1) outputs += base_outputs else: outputs = tf.stack(outputs[:-1], axis=1) self._outputs = tf.reshape( outputs, (batch_size, horizon, num_nodes, output_dim), name='outputs') self._merged = tf.summary.merge_all()
def __init__(self, is_training, batch_size, scaler, adj_mx, **model_kwargs): # Scaler for data normalization. self._scaler = scaler # Train and loss self._loss = None self._mae = None self._train_op = None max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) filter_type = model_kwargs.get('filter_type', 'laplacian') horizon = int(model_kwargs.get('horizon', 1)) # max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) num_nodes = int(model_kwargs.get('num_nodes', 1)) num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) rnn_units = int(model_kwargs.get('rnn_units')) seq_len = int(model_kwargs.get('seq_len')) use_curriculum_learning = bool( model_kwargs.get('use_curriculum_learning', False)) input_dim = int(model_kwargs.get('input_dim', 1)) output_dim = int(model_kwargs.get('output_dim', 1)) # Input (batch_size, timesteps, num_sensor, input_dim) self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs') # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension. self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, output_dim), name='labels') # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim)) GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim)) cell = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, filter_type=filter_type) cell_with_projection = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, num_proj=output_dim, filter_type=filter_type) encoding_cells = [cell] * num_rnn_layers decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection] encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells, state_is_tuple=True) decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells, state_is_tuple=True) global_step = tf.train.get_or_create_global_step() # Outputs: (batch_size, timesteps, num_nodes, output_dim) with tf.variable_scope('DCRNN_SEQ'): inputs = tf.unstack(tf.reshape( self._inputs, (batch_size, seq_len, num_nodes * input_dim)), axis=1) labels = tf.unstack( tf.reshape(self._labels[..., :output_dim], (batch_size, horizon, num_nodes * output_dim)), axis=1) labels.insert(0, GO_SYMBOL) def _loop_function(prev, i): if is_training: # Return either the model's prediction or the previous ground truth in training. if use_curriculum_learning: c = tf.random_uniform((), minval=0, maxval=1.) threshold = self._compute_sampling_threshold( global_step, cl_decay_steps) result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev) else: result = labels[i] else: # Return the prediction of the model in testing. result = prev return result _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32) outputs, final_state = legacy_seq2seq.rnn_decoder( labels, enc_state, decoding_cells, loop_function=_loop_function) # Project the output to output_dim. outputs = tf.stack(outputs[:-1], axis=1) self._outputs = tf.reshape( outputs, (batch_size, horizon, num_nodes, output_dim), name='outputs') self._merged = tf.summary.merge_all()
def __init__(self, is_training, batch_size, scaler, adj_mx, **model_kwargs): # Scaler for data normalization. self._scaler = scaler # Train and loss self._loss = None self._mae = None self._train_op = None max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 2000)) filter_type = model_kwargs.get('filter_type', 'laplacian') horizon = int(model_kwargs.get('horizon', 1)) max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) num_nodes = int(model_kwargs.get('num_nodes', 1)) num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) rnn_units = int(model_kwargs.get('rnn_units')) seq_len = int(model_kwargs.get('seq_len')) use_curriculum_learning = bool( model_kwargs.get('use_curriculum_learning', False)) input_dim = int(model_kwargs.get('input_dim', 1)) output_dim = 1 self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs') self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, input_dim), name='labels') self.train_inputs = tf.concat((self._inputs, self._labels), axis=1) self._targets = tf.slice( self.train_inputs, [0, 0, 0, 0], [batch_size, horizon + seq_len - 1, num_nodes, 1], name='targets') cell_1st_layer = DCGRUCell(rnn_units, adj_mx, first_layer=True, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, filter_type=filter_type) cell = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, filter_type=filter_type) # We temporarily change the num_proj from output_dim to input_dim. cell_with_projection = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, num_proj=output_dim, filter_type=filter_type) decoding_cells = [cell_1st_layer] + [cell] * (num_rnn_layers - 2) + [ cell_with_projection ] decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells, state_is_tuple=True) global_step = tf.train.get_or_create_global_step() with tf.variable_scope('DCRNN_SEQ'): train_inputs = tf.unstack(self.train_inputs, axis=1) # We need to tear the train_inputs up. def _loop_function(prev, i): # To do: the probability of using the previous is increasing when going towards the # end of the sequence. train_input = train_inputs[i] if len(train_input.shape) == 3: time_input = tf.slice( train_input, [0, 0, 1], [train_input.shape[0], train_input.shape[1], 1]) if is_training: if use_curriculum_learning: c = tf.random_uniform((), minval=0.0, maxval=1.0) threshold = self._compute_sampling_threshold( global_step, cl_decay_steps) if i < seq_len: result = train_input else: # result = tf.cond(tf.less(c, threshold), lambda: train_inputs[i], lambda: tf.concat([prev, # day_input, time_input], axis=-1)) result = tf.cond( tf.less(c, threshold), lambda: train_inputs[i], lambda: tf.concat([prev, time_input], axis=-1)) else: result = train_inputs[i] else: if i < seq_len: result = train_inputs[i] else: # result = tf.concat([prev, day_input, time_input], axis=-1) result = tf.concat([prev, time_input], axis=-1) return result initial_state = [ tf.zeros(shape=(64, 13248)) for _ in range(num_rnn_layers) ] state = initial_state outputs = [] prev = None for i, inp in enumerate(train_inputs): with tf.variable_scope("loop_function", reuse=True): if prev is not None: inp = _loop_function(prev, i) if i > 0: tf.get_variable_scope().reuse_variables() output, state = decoding_cells(inp, state) output = tf.reshape(output, (batch_size, num_nodes, output_dim)) outputs.append(output) prev = output outputs_dim = 1 outputs = tf.stack(outputs[:-1], axis=1) self._outputs = tf.reshape( outputs, (batch_size, horizon + seq_len - 1, num_nodes, outputs_dim), name='outputs') self._merged = tf.summary.merge_all()
def __init__(self, is_training, batch_size, scaler, adj_matrix_file, **model_kwargs): # Scaler for data normalization. self._scaler = scaler # Train and loss self._loss = None self._mae = None self._train_op = None max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 0)) cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) filter_type = model_kwargs.get('filter_type', 'laplacian') networkType = model_kwargs.get('network', 'gconv') # fc/gconv matrixType = model_kwargs.get('weightMatrix') # a/d attention = model_kwargs.get('attention') horizon = int(model_kwargs.get('horizon', 1)) max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) num_nodes = int(model_kwargs.get('num_nodes', 1)) num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) rnn_units = int(model_kwargs.get('rnn_units')) seq_len = int(model_kwargs.get('seq_len')) use_curriculum_learning = bool( model_kwargs.get('use_curriculum_learning', False)) input_dim = int(model_kwargs.get('input_dim', 1)) output_dim = int(model_kwargs.get('output_dim', 1)) aux_dim = input_dim - output_dim _, _, adj_mx = load_graph_data(adj_matrix_file) graphEmbedFile = None if networkType == 'fc': graphEmbedFile = model_kwargs.get('graphEmbedFile') # input_dim = 2 # output_dim = 1 # Input (batch_size, timesteps, num_sensor, input_dim) # print(batch_size, seq_len, num_nodes, input_dim) # 64 12 207 2 # Batch size is a term used in machine learning and refers to the number of training examples utilised in one iteration. self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs') # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension. self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, input_dim), name='labels') # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim)) GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim)) cell = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, network_type=networkType, graphEmbedFile=graphEmbedFile, filter_type=filter_type) cell_with_projection = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, network_type=networkType, graphEmbedFile=graphEmbedFile, num_proj=output_dim, filter_type=filter_type) encoding_cells = [cell] * num_rnn_layers decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection] # projection is for the last step of decoding encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells, state_is_tuple=True) decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells, state_is_tuple=True) # print('We have initiated the cells.') global_step = tf.train.get_or_create_global_step() # Outputs: (batch_size, timesteps, num_nodes, output_dim) with tf.variable_scope('DCRNN_SEQ'): # What are the inputs and labels?? # labels are ground truth # What is input_dim and output_dim # input_dim = 2 # output_dim = 1 inputs = tf.unstack(tf.reshape( self._inputs, (batch_size, seq_len, num_nodes * input_dim)), axis=1) labels = tf.unstack( tf.reshape(self._labels[..., :output_dim], (batch_size, horizon, num_nodes * output_dim)), axis=1) if aux_dim > 0: aux_info = tf.unstack(self._labels[..., output_dim:], axis=1) aux_info.insert(0, None) labels.insert(0, GO_SYMBOL) # print('Did we arrive here? Yes we did.') def _loop_function(prev, i): if is_training: # Return either the model's prediction or the previous ground truth in training. if use_curriculum_learning: c = tf.random_uniform((), minval=0, maxval=1.) threshold = self._compute_sampling_threshold( global_step, cl_decay_steps) result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev) else: result = labels[i] else: # Return the prediction of the model in testing. result = prev # print(result.shape) # exit() # (64, 207) if False and aux_dim > 0: result = tf.reshape(result, (batch_size, num_nodes, output_dim)) # print(result.shape) # (64, 207, 1) result = tf.concat([result, aux_info[i]], axis=-1) # print(result.shape) # (64, 207, 2) result = tf.reshape(result, (batch_size, num_nodes * input_dim)) # print(result.shape) # print(result.shape) # (64, 414) return result # tf.contrib.rnn.static_rnn: https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/contrib/rnn/static_rnn # Creates a recurrent neural network specified by RNNCell: cell. # _gconv is called several times in this step _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32) # exit() # ****** HaHa ****** appeared 24 times # exit() # outputs is a list # Inside the decoder function, there is a loop function that probably propogates in the rnn structure # there are many printouts for calling the cells as a function, in the _gconv # outputs is of 13 such rnn cells # <tf.Tensor 'Train/DCRNN/DCRNN_SEQ/rnn_decoder/rnn_decoder/multi_rnn_cell/cell_1_12/dcgru_cell/projection/Reshape_1:0' shape=(64, 207) dtype=float32> # final_state is of 2 such rnn cells # <tf.Tensor 'Train/DCRNN/DCRNN_SEQ/rnn_decoder/rnn_decoder/multi_rnn_cell/cell_0_12/dcgru_cell/add:0' shape=(64, 13248) dtype=float32> # print('We are now in decoding') # tf.contrib.legacy_seq2seq.rnn_decoder: https://www.tensorflow.org/api_docs/python/tf/contrib/legacy_seq2seq/rnn_decoder # RNN decoder for the sequence-to-sequence model. # _gconv is called several times in this step outputs, final_state = legacy_seq2seq.rnn_decoder( labels, enc_state, decoding_cells, loop_function=_loop_function) # print("Did we arrive here? No we didn't.") # Project the output to output_dim. # https://www.tensorflow.org/api_docs/python/tf/stack # Why remove the last element? outputs = tf.stack(outputs[:-1], axis=1) # outputs is not a list anymore, but a stacked tensor self._outputs = tf.reshape( outputs, (batch_size, horizon, num_nodes, output_dim), name='outputs') self._merged = tf.summary.merge_all()
def __init__(self, num_station, input_steps, num_layers=2, num_units=64, max_diffusion_step=2, dy_adj=1, dy_filter=0, f_adj_mx=None, trained_adj_mx=False, filter_type='dual_random_walk', batch_size=32): self.num_station = num_station self.input_steps = input_steps self.num_units = num_units self.max_diffusion_step = max_diffusion_step self.dy_adj = dy_adj self.dy_filter = dy_filter self.f_adj_mx = f_adj_mx self.filter_type = filter_type self.batch_size = batch_size self.weight_initializer = tf.contrib.layers.xavier_initializer() self.const_initializer = tf.constant_initializer() if trained_adj_mx: with tf.variable_scope('trained_adj_mx', reuse=tf.AUTO_REUSE): adj_mx = tf.get_variable('adj_mx', [self.num_station, self.num_station], dtype=tf.float32, initializer=self.weight_initializer) else: adj_mx = self.f_adj_mx # first_cell = DCGRUCell(self.num_units, adj_mx=adj_mx, max_diffusion_step=self.max_diffusion_step, num_nodes=self.num_station, num_proj=None, input_dim=2, dy_adj=self.dy_adj, dy_filter=self.dy_filter, output_dy_adj=self.dy_adj, filter_type=self.filter_type) cell = DCGRUCell(self.num_units, adj_mx=adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=self.num_station, num_proj=None, input_dim=self.num_units, dy_adj=self.dy_adj, dy_filter=0, output_dy_adj=self.dy_adj, filter_type=self.filter_type) cell_with_projection = DCGRUCell(self.num_units, adj_mx=adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=self.num_station, num_proj=2, input_dim=self.num_units, dy_adj=self.dy_adj, dy_filter=0, output_dy_adj=False, filter_type=self.filter_type) if num_layers > 2: cells = [first_cell ] + [cell] * (num_layers - 2) + [cell_with_projection] else: cells = [first_cell, cell_with_projection] self.cells = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True) # self.x = tf.placeholder( tf.float32, [self.batch_size, self.input_steps, self.num_station, 2]) self.f = tf.placeholder(tf.float32, [ self.batch_size, self.input_steps, self.num_station, self.num_station ]) self.y = tf.placeholder( tf.float32, [self.batch_size, self.input_steps, self.num_station, 2])