Example #1
0
    def __init__(self, input_dim, adj_mat, max_diffusion_step, num_nodes,
                 hid_dim, output_dim, num_rnn_layers):
        super(DCGRUDecoder, self).__init__()
        self.hid_dim = hid_dim
        self._num_nodes = num_nodes  # 207
        self._output_dim = output_dim  # should be 1
        self._num_rnn_layers = num_rnn_layers

        cell = DCGRUCell(input_dim=hid_dim,
                         num_units=hid_dim,
                         adj_mat=adj_mat,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=num_nodes)
        cell_with_projection = DCGRUCell(input_dim=hid_dim,
                                         num_units=hid_dim,
                                         adj_mat=adj_mat,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=num_nodes,
                                         num_proj=output_dim)

        decoding_cells = list()
        # first layer of the decoder
        decoding_cells.append(
            DCGRUCell(input_dim=input_dim,
                      num_units=hid_dim,
                      adj_mat=adj_mat,
                      max_diffusion_step=max_diffusion_step,
                      num_nodes=num_nodes))
        # construct multi-layer rnn
        for _ in range(1, num_rnn_layers - 1):
            decoding_cells.append(cell)
        decoding_cells.append(cell_with_projection)
        self.decoding_cells = nn.ModuleList(decoding_cells)
Example #2
0
    def __init__(self, input_dim, adj_mat, max_diffusion_step, hid_dim,
                 num_nodes, num_rnn_layers):
        super(DCRNNEncoder, self).__init__()
        self.hid_dim = hid_dim
        self._num_rnn_layers = num_rnn_layers

        # encoding_cells = []
        encoding_cells = list()
        # the first layer has different input_dim
        encoding_cells.append(
            DCGRUCell(input_dim=input_dim,
                      num_units=hid_dim,
                      adj_mat=adj_mat,
                      max_diffusion_step=max_diffusion_step,
                      num_nodes=num_nodes))

        # construct multi-layer rnn
        for _ in range(1, num_rnn_layers):
            encoding_cells.append(
                DCGRUCell(input_dim=hid_dim,
                          num_units=hid_dim,
                          adj_mat=adj_mat,
                          max_diffusion_step=max_diffusion_step,
                          num_nodes=num_nodes))
        self.encoding_cells = nn.ModuleList(encoding_cells)
Example #3
0
    def __init__(self, is_training, config, scaler=None, adj_mx=None):
        super(DCRNNModel, self).__init__(config, scaler=scaler)
        batch_size = int(config.get('batch_size'))
        max_diffusion_step = int(config.get('max_diffusion_step', 2))
        cl_decay_steps = int(config.get('cl_decay_steps', 1000))
        filter_type = config.get('filter_type', 'laplacian')
        horizon = int(config.get('horizon', 1))
        input_dim = int(config.get('input_dim', 1))
        loss_func = config.get('loss_func', 'MSE')
        max_grad_norm = float(config.get('max_grad_norm', 5.0))
        num_nodes = int(config.get('num_nodes', 1))
        num_rnn_layers = int(config.get('num_rnn_layers', 1))
        output_dim = int(config.get('output_dim', 1))
        rnn_units = int(config.get('rnn_units'))
        seq_len = int(config.get('seq_len'))
        use_curriculum_learning = bool(
            config.get('use_curriculum_learning', False))

        assert input_dim == output_dim, 'input_dim: %d != output_dim: %d' % (
            input_dim, output_dim)
        # Input (batch_size, timesteps, num_sensor, input_dim)
        self._inputs = tf.placeholder(tf.float32,
                                      shape=(batch_size, seq_len, num_nodes,
                                             input_dim),
                                      name='inputs')
        # Labels: (batch_size, timesteps, num_sensor, output_dim)
        self._labels = tf.placeholder(tf.float32,
                                      shape=(batch_size, horizon, num_nodes,
                                             output_dim),
                                      name='labels')
        GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim))

        cell = DCGRUCell(rnn_units,
                         adj_mx,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=num_nodes,
                         filter_type=filter_type)
        cell_with_projection = DCGRUCell(rnn_units,
                                         adj_mx,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=num_nodes,
                                         num_proj=output_dim,
                                         filter_type=filter_type)
        encoding_cells = [cell] * num_rnn_layers
        decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection]
        encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells,
                                                     state_is_tuple=True)
        decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells,
                                                     state_is_tuple=True)

        global_step = tf.train.get_or_create_global_step()
        # Outputs: (batch_size, timesteps, num_nodes, output_dim)
        with tf.variable_scope('DCRNN_SEQ'):
            inputs = tf.unstack(tf.reshape(
                self._inputs, (batch_size, seq_len, num_nodes * input_dim)),
                                axis=1)
            labels = tf.unstack(tf.reshape(
                self._labels, (batch_size, horizon, num_nodes * output_dim)),
                                axis=1)
            labels.insert(0, GO_SYMBOL)
            loop_function = None
            if is_training:
                if use_curriculum_learning:

                    def loop_function(prev, i):
                        c = tf.random_uniform((), minval=0, maxval=1.)
                        threshold = self._compute_sampling_threshold(
                            global_step, cl_decay_steps)
                        result = tf.cond(tf.less(c, threshold),
                                         lambda: labels[i], lambda: prev)
                        return result
            else:
                # Return the output of the model.
                def loop_function(prev, _):
                    return prev

            _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells,
                                                     inputs,
                                                     dtype=tf.float32)
            outputs, final_state = legacy_seq2seq.rnn_decoder(
                labels, enc_state, decoding_cells, loop_function=loop_function)

        # Project the output to output_dim.
        outputs = tf.stack(outputs[:-1], axis=1)
        self._outputs = tf.reshape(
            outputs, (batch_size, horizon, num_nodes, output_dim),
            name='outputs')

        preds = self._outputs[..., 0]
        labels = self._labels[..., 0]

        null_val = config.get('null_val', 0.)
        self._mae = masked_mae_loss(self._scaler, null_val)(preds=preds,
                                                            labels=labels)

        if loss_func == 'MSE':
            self._loss = masked_mse_loss(self._scaler,
                                         null_val)(preds=self._outputs,
                                                   labels=self._labels)
        elif loss_func == 'MAE':
            self._loss = masked_mae_loss(self._scaler,
                                         null_val)(preds=self._outputs,
                                                   labels=self._labels)
        elif loss_func == 'RMSE':
            self._loss = masked_rmse_loss(self._scaler,
                                          null_val)(preds=self._outputs,
                                                    labels=self._labels)
        else:
            self._loss = masked_mse_loss(self._scaler,
                                         null_val)(preds=self._outputs,
                                                   labels=self._labels)
        if is_training:
            optimizer = tf.train.AdamOptimizer(self._lr)
            tvars = tf.trainable_variables()
            grads = tf.gradients(self._loss, tvars)
            grads, _ = tf.clip_by_global_norm(grads, max_grad_norm)
            self._train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                       global_step=global_step,
                                                       name='train_op')

        self._merged = tf.summary.merge_all()
Example #4
0
    def __init__(self, is_training, batch_size, scaler, adj_mx,
                 **model_kwargs):
        # Scaler for data normalization.
        self._scaler = scaler

        # Train and loss
        self._loss = None
        self._mae = None
        self._train_op = None

        max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2))
        cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000))
        filter_type = model_kwargs.get('filter_type', 'laplacian')
        horizon = int(model_kwargs.get('horizon', 1))
        max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0))
        num_nodes = int(model_kwargs.get('num_nodes', 1))
        num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1))
        rnn_units = int(model_kwargs.get('rnn_units'))
        seq_len = int(model_kwargs.get('seq_len'))
        use_curriculum_learning = bool(
            model_kwargs.get('use_curriculum_learning', False))
        input_dim = int(model_kwargs.get('input_dim', 1))
        output_dim = int(model_kwargs.get('output_dim', 1))
        aux_dim = input_dim - output_dim

        # Input (batch_size, timesteps, num_sensor, input_dim)
        self._inputs = tf.placeholder(tf.float32,
                                      shape=(batch_size, seq_len, num_nodes,
                                             input_dim),
                                      name='inputs')
        # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension.
        self._labels = tf.placeholder(tf.float32,
                                      shape=(batch_size, horizon, num_nodes,
                                             input_dim),
                                      name='labels')
        # 注:seq_len是输入的序列长度,horizon是输出的序列长度

        # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim))
        GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim))

        cell = DCGRUCell(rnn_units,
                         adj_mx,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=num_nodes,
                         filter_type=filter_type)
        cell_with_projection = DCGRUCell(rnn_units,
                                         adj_mx,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=num_nodes,
                                         num_proj=output_dim,
                                         filter_type=filter_type)
        encoding_cells = [cell] * num_rnn_layers
        decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection]
        encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells,
                                                     state_is_tuple=True)
        decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells,
                                                     state_is_tuple=True)

        global_step = tf.train.get_or_create_global_step()
        # Outputs: (batch_size, timesteps, num_nodes, output_dim)
        with tf.variable_scope('DCRNN_SEQ'):
            inputs = tf.unstack(tf.reshape(
                self._inputs, (batch_size, seq_len, num_nodes * input_dim)),
                                axis=1)
            labels = tf.unstack(  # unstack把inputs和labels按timesteps拆分成12份,再组成一个list
                # unstack讲解:https://www.jianshu.com/p/25706575f8d4
                tf.reshape(self._labels[..., :output_dim],
                           (batch_size, horizon, num_nodes * output_dim)),
                axis=1)
            if aux_dim > 0:  # ToDo: input_dim - output_dim > 0 时是怎么处理的?
                aux_info = tf.unstack(self._labels[..., output_dim:], axis=1)
                aux_info.insert(0, None)  # insert(index, object)
            labels.insert(0, GO_SYMBOL)  # ToDo: ?

            def _loop_function(prev, i):
                if is_training:
                    # Return either the model's prediction or the previous ground truth in training.
                    if use_curriculum_learning:  # 【Scheduled Sampling策略】
                        c = tf.random_uniform((), minval=0, maxval=1.)
                        threshold = self._compute_sampling_threshold(
                            global_step, cl_decay_steps)
                        result = tf.cond(tf.less(c, threshold),
                                         lambda: labels[i], lambda: prev)
                    else:
                        result = labels[i]
                else:
                    # Return the prediction of the model in testing.
                    result = prev
                if False and aux_dim > 0:
                    result = tf.reshape(result,
                                        (batch_size, num_nodes, output_dim))
                    result = tf.concat([result, aux_info[i]], axis=-1)
                    result = tf.reshape(result,
                                        (batch_size, num_nodes * input_dim))
                return result

            # 调库两行完成seq2seq的encoder-decoder过程
            # legacy_seq2seq库是静态展开,即要求输入序列都是指定的长度;seq2seq库是动态展开
            # 但是不管静态还是动态seq2seq库,输入的每一个batch内的序列长度都要一样。
            # legacy_seq2seq模块讲解:https://blog.csdn.net/u012871493/article/details/72350332
            _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells,
                                                     inputs,
                                                     dtype=tf.float32)
            outputs, final_state = legacy_seq2seq.rnn_decoder(
                labels,
                enc_state,
                decoding_cells,
                loop_function=_loop_function)

        # Project the output to output_dim.
        outputs = tf.stack(outputs[:-1], axis=1)
        self._outputs = tf.reshape(
            outputs, (batch_size, horizon, num_nodes, output_dim),
            name='outputs')
        self._merged = tf.summary.merge_all()  # ToDo: merge_all?
Example #5
0
    def __init__(self, args, is_training, batch_size, scaler, adj_mx,
                 **model_kwargs):
        # Scaler for data normalization.
        self._scaler = scaler

        # Train and loss
        self._loss = None
        self._mae = None
        self._train_op = None

        max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2))
        diffusion_with_graph_kernel = model_kwargs.get(
            'diffusion_with_graph_kernel', False)
        graph_kernel_mode = model_kwargs.get('graph_kernel_mode', 'local')
        cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000))
        filter_type = model_kwargs.get('filter_type', 'laplacian')
        horizon = int(model_kwargs.get('horizon', 1))
        max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0))
        num_nodes = int(model_kwargs.get('num_nodes', 1))
        num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1))
        rnn_units = int(model_kwargs.get('rnn_units'))
        seq_len = int(model_kwargs.get('seq_len'))
        use_curriculum_learning = bool(
            model_kwargs.get('use_curriculum_learning', False))
        input_dim = int(model_kwargs.get('input_dim', 1))
        output_dim = int(model_kwargs.get('output_dim', 1))
        pred_without_zero_input = model_kwargs.get('pred_without_zero_input')
        squeeze_and_excitation = model_kwargs.get('squeeze_and_excitation')
        se_activate = model_kwargs.get('se_activate')
        excitation_rate = model_kwargs.get('excitation_rate')
        r = model_kwargs.get('r')
        residuals = model_kwargs.get('residuals')
        cell_forward_mode = model_kwargs.get('cell_forward_mode')
        with_inputs_diffusion = model_kwargs.get('with_inputs_diffusion')
        with_inputs_channel_wise_attention = model_kwargs.get(
            'with_inputs_channel_wise_attention')
        attention_mode = model_kwargs.get('attention_mode')

        aux_dim = input_dim - output_dim

        # Input (batch_size, timesteps, num_sensor, input_dim)
        self._inputs = tf.placeholder(tf.float32,
                                      shape=(batch_size, seq_len, num_nodes,
                                             input_dim),
                                      name='inputs')
        # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension.
        self._labels = tf.placeholder(tf.float32,
                                      shape=(batch_size, horizon, num_nodes,
                                             input_dim),
                                      name='labels')

        # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim))
        GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim))
        init_state = [
            tf.zeros(shape=(batch_size, num_nodes * rnn_units)),
            tf.zeros(shape=(batch_size, num_nodes * rnn_units))
        ]
        cell = DCGRUCell(args,
                         rnn_units,
                         adj_mx,
                         squeeze_and_excitation,
                         se_activate,
                         excitation_rate,
                         r,
                         diffusion_with_graph_kernel,
                         graph_kernel_mode,
                         cell_forward_mode,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=num_nodes,
                         filter_type=filter_type,
                         use_gc_for_ru=True)
        cell_with_projection = DCGRUCell(args,
                                         rnn_units,
                                         adj_mx,
                                         squeeze_and_excitation,
                                         se_activate,
                                         excitation_rate,
                                         r,
                                         diffusion_with_graph_kernel,
                                         graph_kernel_mode,
                                         cell_forward_mode,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=num_nodes,
                                         num_proj=output_dim,
                                         filter_type=filter_type,
                                         use_gc_for_ru=True)
        test_cells = [cell] * num_rnn_layers
        #encoding_cells = [cell] * num_rnn_layers
        decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection]
        #encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells, state_is_tuple=True)
        test_cells = tf.contrib.rnn.MultiRNNCell(test_cells,
                                                 state_is_tuple=True)
        decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells,
                                                     state_is_tuple=True)

        global_step = tf.train.get_or_create_global_step()
        # Outputs: (batch_size, timesteps, num_nodes, output_dim)
        with tf.variable_scope('DCRNN_SEQ'):
            inputs = tf.unstack(tf.reshape(
                self._inputs, (batch_size, seq_len, num_nodes * input_dim)),
                                axis=1)
            labels = tf.unstack(
                tf.reshape(self._labels[..., :output_dim],
                           (batch_size, horizon, num_nodes * output_dim)),
                axis=1)
            if aux_dim > 0:
                aux_info = tf.unstack(self._labels[..., output_dim:], axis=1)
                aux_info.insert(0, None)
            if not pred_without_zero_input:
                labels.insert(0, GO_SYMBOL)

            def _loop_function(prev, i):
                if is_training:
                    # Return either the model's prediction or the previous ground truth in training.
                    if use_curriculum_learning:
                        c = tf.random_uniform((), minval=0, maxval=1.)
                        threshold = self._compute_sampling_threshold(
                            global_step, cl_decay_steps)
                        result = tf.cond(tf.less(c, threshold),
                                         lambda: labels[i], lambda: prev)
                    else:
                        result = labels[i]
                else:
                    # Return the prediction of the model in testing.
                    result = prev
                if False and aux_dim > 0:
                    result = tf.reshape(result,
                                        (batch_size, num_nodes, output_dim))
                    result = tf.concat([result, aux_info[i]], axis=-1)
                    result = tf.reshape(result,
                                        (batch_size, num_nodes * input_dim))
                return result

            output, state = rnn_encoder(inputs, init_state, test_cells, cell,
                                        with_inputs_diffusion,
                                        with_inputs_channel_wise_attention,
                                        attention_mode, r)
            if pred_without_zero_input:
                first_output = get_first_output(num_nodes, rnn_units,
                                                output_dim, output)
                outputs, final_state = rnn_decoder(
                    labels,
                    state,
                    decoding_cells,
                    cell,
                    with_inputs_diffusion,
                    with_inputs_channel_wise_attention,
                    attention_mode,
                    r,
                    loop_function=_loop_function,
                )
                outputs.insert(0, first_output)
            else:
                #_, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32)
                outputs, final_state = rnn_decoder(
                    labels,
                    state,
                    decoding_cells,
                    cell,
                    with_inputs_diffusion,
                    with_inputs_channel_wise_attention,
                    attention_mode,
                    r,
                    loop_function=_loop_function)

        # Project the output to output_dim.

        if residuals and not pred_without_zero_input:
            first_base_outputs = tf.reshape(
                tf.reshape(
                    inputs[-1],
                    (batch_size, num_nodes, input_dim))[..., :output_dim],
                (batch_size, num_nodes * output_dim))
            base_outputs = outputs[:-2]
            base_outputs.insert(0, first_base_outputs)
            base_outputs = tf.stack(base_outputs, axis=1)
            outputs = tf.stack(outputs[:-1], axis=1)
            outputs += base_outputs
        else:
            outputs = tf.stack(outputs[:-1], axis=1)
        self._outputs = tf.reshape(
            outputs, (batch_size, horizon, num_nodes, output_dim),
            name='outputs')
        self._merged = tf.summary.merge_all()
Example #6
0
    def __init__(self, is_training, batch_size, scaler, adj_mx,
                 **model_kwargs):
        # Scaler for data normalization.
        self._scaler = scaler

        # Train and loss
        self._loss = None
        self._mae = None
        self._train_op = None

        max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2))
        cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000))
        filter_type = model_kwargs.get('filter_type', 'laplacian')
        horizon = int(model_kwargs.get('horizon', 1))
        # max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0))
        num_nodes = int(model_kwargs.get('num_nodes', 1))
        num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1))
        rnn_units = int(model_kwargs.get('rnn_units'))
        seq_len = int(model_kwargs.get('seq_len'))
        use_curriculum_learning = bool(
            model_kwargs.get('use_curriculum_learning', False))
        input_dim = int(model_kwargs.get('input_dim', 1))
        output_dim = int(model_kwargs.get('output_dim', 1))

        # Input (batch_size, timesteps, num_sensor, input_dim)
        self._inputs = tf.placeholder(tf.float32,
                                      shape=(batch_size, seq_len, num_nodes,
                                             input_dim),
                                      name='inputs')
        # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension.
        self._labels = tf.placeholder(tf.float32,
                                      shape=(batch_size, horizon, num_nodes,
                                             output_dim),
                                      name='labels')
        # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim))
        GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim))

        cell = DCGRUCell(rnn_units,
                         adj_mx,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=num_nodes,
                         filter_type=filter_type)
        cell_with_projection = DCGRUCell(rnn_units,
                                         adj_mx,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=num_nodes,
                                         num_proj=output_dim,
                                         filter_type=filter_type)
        encoding_cells = [cell] * num_rnn_layers
        decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection]
        encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells,
                                                     state_is_tuple=True)
        decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells,
                                                     state_is_tuple=True)

        global_step = tf.train.get_or_create_global_step()
        # Outputs: (batch_size, timesteps, num_nodes, output_dim)
        with tf.variable_scope('DCRNN_SEQ'):
            inputs = tf.unstack(tf.reshape(
                self._inputs, (batch_size, seq_len, num_nodes * input_dim)),
                                axis=1)
            labels = tf.unstack(
                tf.reshape(self._labels[..., :output_dim],
                           (batch_size, horizon, num_nodes * output_dim)),
                axis=1)
            labels.insert(0, GO_SYMBOL)

            def _loop_function(prev, i):
                if is_training:
                    # Return either the model's prediction or the previous ground truth in training.
                    if use_curriculum_learning:
                        c = tf.random_uniform((), minval=0, maxval=1.)
                        threshold = self._compute_sampling_threshold(
                            global_step, cl_decay_steps)
                        result = tf.cond(tf.less(c, threshold),
                                         lambda: labels[i], lambda: prev)
                    else:
                        result = labels[i]
                else:
                    # Return the prediction of the model in testing.
                    result = prev
                return result

            _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells,
                                                     inputs,
                                                     dtype=tf.float32)
            outputs, final_state = legacy_seq2seq.rnn_decoder(
                labels,
                enc_state,
                decoding_cells,
                loop_function=_loop_function)

        # Project the output to output_dim.
        outputs = tf.stack(outputs[:-1], axis=1)
        self._outputs = tf.reshape(
            outputs, (batch_size, horizon, num_nodes, output_dim),
            name='outputs')
        self._merged = tf.summary.merge_all()
Example #7
0
    def __init__(self, is_training, batch_size, scaler, adj_mx,
                 **model_kwargs):
        # Scaler for data normalization.
        self._scaler = scaler

        # Train and loss
        self._loss = None
        self._mae = None
        self._train_op = None

        max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2))
        cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 2000))
        filter_type = model_kwargs.get('filter_type', 'laplacian')
        horizon = int(model_kwargs.get('horizon', 1))
        max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0))
        num_nodes = int(model_kwargs.get('num_nodes', 1))
        num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1))
        rnn_units = int(model_kwargs.get('rnn_units'))
        seq_len = int(model_kwargs.get('seq_len'))
        use_curriculum_learning = bool(
            model_kwargs.get('use_curriculum_learning', False))
        input_dim = int(model_kwargs.get('input_dim', 1))
        output_dim = 1

        self._inputs = tf.placeholder(tf.float32,
                                      shape=(batch_size, seq_len, num_nodes,
                                             input_dim),
                                      name='inputs')
        self._labels = tf.placeholder(tf.float32,
                                      shape=(batch_size, horizon, num_nodes,
                                             input_dim),
                                      name='labels')
        self.train_inputs = tf.concat((self._inputs, self._labels), axis=1)

        self._targets = tf.slice(
            self.train_inputs, [0, 0, 0, 0],
            [batch_size, horizon + seq_len - 1, num_nodes, 1],
            name='targets')

        cell_1st_layer = DCGRUCell(rnn_units,
                                   adj_mx,
                                   first_layer=True,
                                   max_diffusion_step=max_diffusion_step,
                                   num_nodes=num_nodes,
                                   filter_type=filter_type)

        cell = DCGRUCell(rnn_units,
                         adj_mx,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=num_nodes,
                         filter_type=filter_type)
        # We temporarily change the num_proj from output_dim to input_dim.
        cell_with_projection = DCGRUCell(rnn_units,
                                         adj_mx,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=num_nodes,
                                         num_proj=output_dim,
                                         filter_type=filter_type)
        decoding_cells = [cell_1st_layer] + [cell] * (num_rnn_layers - 2) + [
            cell_with_projection
        ]
        decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells,
                                                     state_is_tuple=True)
        global_step = tf.train.get_or_create_global_step()

        with tf.variable_scope('DCRNN_SEQ'):
            train_inputs = tf.unstack(self.train_inputs, axis=1)

            # We need to tear the train_inputs up.
            def _loop_function(prev, i):
                # To do: the probability of using the previous is increasing when going towards the
                # end of the sequence.
                train_input = train_inputs[i]
                if len(train_input.shape) == 3:
                    time_input = tf.slice(
                        train_input, [0, 0, 1],
                        [train_input.shape[0], train_input.shape[1], 1])

                if is_training:
                    if use_curriculum_learning:
                        c = tf.random_uniform((), minval=0.0, maxval=1.0)
                        threshold = self._compute_sampling_threshold(
                            global_step, cl_decay_steps)
                        if i < seq_len:
                            result = train_input
                        else:
                            # result = tf.cond(tf.less(c, threshold), lambda: train_inputs[i], lambda: tf.concat([prev,
                            # day_input, time_input], axis=-1))
                            result = tf.cond(
                                tf.less(c, threshold), lambda: train_inputs[i],
                                lambda: tf.concat([prev, time_input], axis=-1))
                    else:
                        result = train_inputs[i]
                else:
                    if i < seq_len:
                        result = train_inputs[i]
                    else:
                        # result = tf.concat([prev, day_input, time_input], axis=-1)
                        result = tf.concat([prev, time_input], axis=-1)
                return result

            initial_state = [
                tf.zeros(shape=(64, 13248)) for _ in range(num_rnn_layers)
            ]
            state = initial_state
            outputs = []
            prev = None

            for i, inp in enumerate(train_inputs):

                with tf.variable_scope("loop_function", reuse=True):
                    if prev is not None:
                        inp = _loop_function(prev, i)
                if i > 0:
                    tf.get_variable_scope().reuse_variables()

                output, state = decoding_cells(inp, state)
                output = tf.reshape(output,
                                    (batch_size, num_nodes, output_dim))
                outputs.append(output)
                prev = output

        outputs_dim = 1
        outputs = tf.stack(outputs[:-1], axis=1)
        self._outputs = tf.reshape(
            outputs,
            (batch_size, horizon + seq_len - 1, num_nodes, outputs_dim),
            name='outputs')
        self._merged = tf.summary.merge_all()
Example #8
0
    def __init__(self, is_training, batch_size, scaler, adj_matrix_file,
                 **model_kwargs):
        # Scaler for data normalization.
        self._scaler = scaler

        # Train and loss
        self._loss = None
        self._mae = None
        self._train_op = None

        max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 0))
        cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000))
        filter_type = model_kwargs.get('filter_type', 'laplacian')

        networkType = model_kwargs.get('network', 'gconv')  # fc/gconv
        matrixType = model_kwargs.get('weightMatrix')  # a/d
        attention = model_kwargs.get('attention')

        horizon = int(model_kwargs.get('horizon', 1))
        max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0))
        num_nodes = int(model_kwargs.get('num_nodes', 1))
        num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1))
        rnn_units = int(model_kwargs.get('rnn_units'))
        seq_len = int(model_kwargs.get('seq_len'))
        use_curriculum_learning = bool(
            model_kwargs.get('use_curriculum_learning', False))
        input_dim = int(model_kwargs.get('input_dim', 1))
        output_dim = int(model_kwargs.get('output_dim', 1))
        aux_dim = input_dim - output_dim

        _, _, adj_mx = load_graph_data(adj_matrix_file)

        graphEmbedFile = None
        if networkType == 'fc':
            graphEmbedFile = model_kwargs.get('graphEmbedFile')
        # input_dim = 2
        # output_dim = 1
        # Input (batch_size, timesteps, num_sensor, input_dim)
        # print(batch_size, seq_len, num_nodes, input_dim)
        # 64 12 207 2
        # Batch size is a term used in machine learning and refers to the number of training examples utilised in one iteration.
        self._inputs = tf.placeholder(tf.float32,
                                      shape=(batch_size, seq_len, num_nodes,
                                             input_dim),
                                      name='inputs')
        # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension.
        self._labels = tf.placeholder(tf.float32,
                                      shape=(batch_size, horizon, num_nodes,
                                             input_dim),
                                      name='labels')

        # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim))
        GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * output_dim))

        cell = DCGRUCell(rnn_units,
                         adj_mx,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=num_nodes,
                         network_type=networkType,
                         graphEmbedFile=graphEmbedFile,
                         filter_type=filter_type)
        cell_with_projection = DCGRUCell(rnn_units,
                                         adj_mx,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=num_nodes,
                                         network_type=networkType,
                                         graphEmbedFile=graphEmbedFile,
                                         num_proj=output_dim,
                                         filter_type=filter_type)
        encoding_cells = [cell] * num_rnn_layers
        decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection]
        # projection is for the last step of decoding
        encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells,
                                                     state_is_tuple=True)
        decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells,
                                                     state_is_tuple=True)
        # print('We have initiated the cells.')

        global_step = tf.train.get_or_create_global_step()
        # Outputs: (batch_size, timesteps, num_nodes, output_dim)
        with tf.variable_scope('DCRNN_SEQ'):
            # What are the inputs and labels??

            # labels are ground truth

            # What is input_dim and output_dim
            # input_dim = 2
            # output_dim = 1
            inputs = tf.unstack(tf.reshape(
                self._inputs, (batch_size, seq_len, num_nodes * input_dim)),
                                axis=1)
            labels = tf.unstack(
                tf.reshape(self._labels[..., :output_dim],
                           (batch_size, horizon, num_nodes * output_dim)),
                axis=1)
            if aux_dim > 0:
                aux_info = tf.unstack(self._labels[..., output_dim:], axis=1)
                aux_info.insert(0, None)
            labels.insert(0, GO_SYMBOL)

            # print('Did we arrive here? Yes we did.')

            def _loop_function(prev, i):
                if is_training:
                    # Return either the model's prediction or the previous ground truth in training.
                    if use_curriculum_learning:
                        c = tf.random_uniform((), minval=0, maxval=1.)
                        threshold = self._compute_sampling_threshold(
                            global_step, cl_decay_steps)
                        result = tf.cond(tf.less(c, threshold),
                                         lambda: labels[i], lambda: prev)
                    else:
                        result = labels[i]
                else:
                    # Return the prediction of the model in testing.
                    result = prev
                # print(result.shape)
                # exit()
                # (64, 207)
                if False and aux_dim > 0:
                    result = tf.reshape(result,
                                        (batch_size, num_nodes, output_dim))
                    # print(result.shape)
                    # (64, 207, 1)
                    result = tf.concat([result, aux_info[i]], axis=-1)
                    # print(result.shape)
                    # (64, 207, 2)
                    result = tf.reshape(result,
                                        (batch_size, num_nodes * input_dim))
                    # print(result.shape)
                    # print(result.shape)
                    # (64, 414)
                return result

            # tf.contrib.rnn.static_rnn: https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/contrib/rnn/static_rnn
            # Creates a recurrent neural network specified by RNNCell: cell.
            # _gconv is called several times in this step
            _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells,
                                                     inputs,
                                                     dtype=tf.float32)
            # exit()
            # ****** HaHa ****** appeared 24 times
            # exit()
            # outputs is a list
            # Inside the decoder function, there is a loop function that probably propogates in the rnn structure
            # there are many printouts for calling the cells as a function, in the _gconv

            # outputs is of 13 such rnn cells
            # <tf.Tensor 'Train/DCRNN/DCRNN_SEQ/rnn_decoder/rnn_decoder/multi_rnn_cell/cell_1_12/dcgru_cell/projection/Reshape_1:0' shape=(64, 207) dtype=float32>

            # final_state is of 2 such rnn cells
            # <tf.Tensor 'Train/DCRNN/DCRNN_SEQ/rnn_decoder/rnn_decoder/multi_rnn_cell/cell_0_12/dcgru_cell/add:0' shape=(64, 13248) dtype=float32>
            # print('We are now in decoding')
            # tf.contrib.legacy_seq2seq.rnn_decoder: https://www.tensorflow.org/api_docs/python/tf/contrib/legacy_seq2seq/rnn_decoder
            # RNN decoder for the sequence-to-sequence model.
            # _gconv is called several times in this step
            outputs, final_state = legacy_seq2seq.rnn_decoder(
                labels,
                enc_state,
                decoding_cells,
                loop_function=_loop_function)

        # print("Did we arrive here? No we didn't.")
        # Project the output to output_dim.
        # https://www.tensorflow.org/api_docs/python/tf/stack
        # Why remove the last element?
        outputs = tf.stack(outputs[:-1], axis=1)
        # outputs is not a list anymore, but a stacked tensor
        self._outputs = tf.reshape(
            outputs, (batch_size, horizon, num_nodes, output_dim),
            name='outputs')
        self._merged = tf.summary.merge_all()
Example #9
0
    def __init__(self,
                 num_station,
                 input_steps,
                 num_layers=2,
                 num_units=64,
                 max_diffusion_step=2,
                 dy_adj=1,
                 dy_filter=0,
                 f_adj_mx=None,
                 trained_adj_mx=False,
                 filter_type='dual_random_walk',
                 batch_size=32):
        self.num_station = num_station
        self.input_steps = input_steps
        self.num_units = num_units
        self.max_diffusion_step = max_diffusion_step

        self.dy_adj = dy_adj
        self.dy_filter = dy_filter
        self.f_adj_mx = f_adj_mx
        self.filter_type = filter_type

        self.batch_size = batch_size

        self.weight_initializer = tf.contrib.layers.xavier_initializer()
        self.const_initializer = tf.constant_initializer()

        if trained_adj_mx:
            with tf.variable_scope('trained_adj_mx', reuse=tf.AUTO_REUSE):
                adj_mx = tf.get_variable('adj_mx',
                                         [self.num_station, self.num_station],
                                         dtype=tf.float32,
                                         initializer=self.weight_initializer)
        else:
            adj_mx = self.f_adj_mx
        #
        first_cell = DCGRUCell(self.num_units,
                               adj_mx=adj_mx,
                               max_diffusion_step=self.max_diffusion_step,
                               num_nodes=self.num_station,
                               num_proj=None,
                               input_dim=2,
                               dy_adj=self.dy_adj,
                               dy_filter=self.dy_filter,
                               output_dy_adj=self.dy_adj,
                               filter_type=self.filter_type)
        cell = DCGRUCell(self.num_units,
                         adj_mx=adj_mx,
                         max_diffusion_step=max_diffusion_step,
                         num_nodes=self.num_station,
                         num_proj=None,
                         input_dim=self.num_units,
                         dy_adj=self.dy_adj,
                         dy_filter=0,
                         output_dy_adj=self.dy_adj,
                         filter_type=self.filter_type)
        cell_with_projection = DCGRUCell(self.num_units,
                                         adj_mx=adj_mx,
                                         max_diffusion_step=max_diffusion_step,
                                         num_nodes=self.num_station,
                                         num_proj=2,
                                         input_dim=self.num_units,
                                         dy_adj=self.dy_adj,
                                         dy_filter=0,
                                         output_dy_adj=False,
                                         filter_type=self.filter_type)
        if num_layers > 2:
            cells = [first_cell
                     ] + [cell] * (num_layers - 2) + [cell_with_projection]
        else:
            cells = [first_cell, cell_with_projection]

        self.cells = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
        #
        self.x = tf.placeholder(
            tf.float32,
            [self.batch_size, self.input_steps, self.num_station, 2])
        self.f = tf.placeholder(tf.float32, [
            self.batch_size, self.input_steps, self.num_station,
            self.num_station
        ])
        self.y = tf.placeholder(
            tf.float32,
            [self.batch_size, self.input_steps, self.num_station, 2])