예제 #1
0
  def testBatch(self):
    # Combine these two states, which each have a batch size of 2, together.
    # Request a batch_size of 5, which means that a new batch of all zeros will
    # be created.
    batched_states = state_util.batch(self._unbatched_states, batch_size=3)

    self._assert_sructures_equal(self._batched_states, batched_states)
예제 #2
0
    def testBatch(self):
        # Combine these two states, which each have a batch size of 2, together.
        # Request a batch_size of 5, which means that a new batch of all zeros will
        # be created.
        batched_states = state_util.batch(self._unbatched_states, batch_size=3)

        self._assert_sructures_equal(self._batched_states, batched_states)
예제 #3
0
    def testBatch_Single(self):
        batched_state = state_util.batch(self._unbatched_states[0:1],
                                         batch_size=1)
        expected_batched_state = (np.array([[[1, 2, 3],
                                             [4, 5, 6]]]), (np.array([[7, 8]]),
                                                            np.array([[9]])),
                                  np.array([[[10], [11]]]))

        self._assert_sructures_equal(expected_batched_state, batched_state)
예제 #4
0
  def testBatch_Single(self):
    batched_state = state_util.batch(self._unbatched_states[0:1], batch_size=1)
    expected_batched_state = (
        np.array([[[1, 2, 3], [4, 5, 6]]]),
        (np.array([[7, 8]]), np.array([[9]])),
        np.array([[[10], [11]]])
    )

    self._assert_sructures_equal(expected_batched_state, batched_state)
예제 #5
0
  def _generate_step(self, event_sequences, inputs, initial_states,
                     temperature):
    """Extends a list of event sequences by a single step each.

    This method modifies the event sequences in place.

    Args:
      event_sequences: A list of event sequence objects, which are extended by
          this method.
      inputs: A Python list of model inputs, with length equal to the number of
          event sequences.
      initial_states: A collection of structures for the initial RNN states,
          one for each event sequence.
      temperature: The softmax temperature.

    Returns:
      final_states: The final RNN states, a list the same size as
          `initial_states`.
      loglik: The log-likelihood of the chosen softmax value for each event
          sequence, a 1-D numpy array of length
          `self._batch_size()`. If `inputs` is a full-length inputs batch, the
          log-likelihood of each entire sequence up to and including the
          generated step will be computed and returned.
    """
    # Split the sequences to extend into batches matching the model batch size.
    batch_size = self._batch_size()
    num_seqs = len(event_sequences)
    num_batches = int(np.ceil(num_seqs / float(batch_size)))

    final_states = []
    loglik = np.empty(num_seqs)

    # Add padding to fill the final batch.
    pad_amt = -len(event_sequences) % batch_size
    padded_event_sequences = event_sequences + [
        copy.deepcopy(event_sequences[-1]) for _ in range(pad_amt)]
    padded_inputs = inputs + [inputs[-1]] * pad_amt
    padded_initial_states = initial_states + [initial_states[-1]] * pad_amt

    for b in range(num_batches):
      i, j = b * batch_size, (b + 1) * batch_size
      pad_amt = max(0, j - num_seqs)
      # Generate a single step for one batch of event sequences.
      batch_final_state, batch_loglik = self._generate_step_for_batch(
          padded_event_sequences[i:j],
          padded_inputs[i:j],
          state_util.batch(padded_initial_states[i:j], batch_size),
          temperature)
      final_states += state_util.unbatch(
          batch_final_state, batch_size)[:j - i - pad_amt]
      loglik[i:j - pad_amt] = batch_loglik[:j - i - pad_amt]

    return final_states, loglik
예제 #6
0
  def _generate_step(self, event_sequences, inputs, initial_states,
                     temperature):
    """Extends a list of event sequences by a single step each.

    This method modifies the event sequences in place.

    Args:
      event_sequences: A list of event sequence objects, which are extended by
          this method.
      inputs: A Python list of model inputs, with length equal to the number of
          event sequences.
      initial_states: A collection of structures for the initial RNN states,
          one for each event sequence.
      temperature: The softmax temperature.

    Returns:
      final_states: The final RNN states, a list the same size as
          `initial_states`.
      loglik: The log-likelihood of the chosen softmax value for each event
          sequence, a 1-D numpy array of length
          `self._batch_size()`. If `inputs` is a full-length inputs batch, the
          log-likelihood of each entire sequence up to and including the
          generated step will be computed and returned.
    """
    # Split the sequences to extend into batches matching the model batch size.
    batch_size = self._batch_size()
    num_seqs = len(event_sequences)
    num_batches = int(np.ceil(num_seqs / float(batch_size)))

    final_states = []
    loglik = np.empty(num_seqs)

    # Add padding to fill the final batch.
    pad_amt = -len(event_sequences) % batch_size
    padded_event_sequences = event_sequences + [
        copy.deepcopy(event_sequences[-1]) for _ in range(pad_amt)]
    padded_inputs = inputs + [inputs[-1]] * pad_amt
    padded_initial_states = initial_states + [initial_states[-1]] * pad_amt

    for b in range(num_batches):
      i, j = b * batch_size, (b + 1) * batch_size
      pad_amt = max(0, j - num_seqs)
      # Generate a single step for one batch of event sequences.
      batch_final_state, batch_loglik = self._generate_step_for_batch(
          padded_event_sequences[i:j],
          padded_inputs[i:j],
          state_util.batch(padded_initial_states[i:j], batch_size),
          temperature)
      final_states += state_util.unbatch(
          batch_final_state, batch_size)[:j - i - pad_amt]
      loglik[i:j - pad_amt] = batch_loglik[:j - i - pad_amt]

    return final_states, loglik
예제 #7
0
    def _generate_step(self,
                       event_sequences,
                       model_states,
                       logliks,
                       temperature,
                       extend_control_events_callback=None,
                       modify_events_callback=None):
        """Extends a list of event sequences by a single step each.

    This method modifies the event sequences in place. It also returns the
    modified event sequences and updated model states and log-likelihoods.

    Args:
      event_sequences: A list of event sequence objects, which are extended by
          this method.
      model_states: A list of model states, each of which contains model inputs
          and initial RNN states.
      logliks: A list containing the current log-likelihood for each event
          sequence.
      temperature: The softmax temperature.
      extend_control_events_callback: A function that takes three arguments: a
          current control event sequence, a current generated event sequence,
          and the control state. The function should a) extend the control event
          sequence to be one longer than the generated event sequence (or do
          nothing if it is already at least this long), and b) return the
          resulting control state.
      modify_events_callback: An optional callback for modifying the event list.
          Can be used to inject events rather than having them generated. If not
          None, will be called with 3 arguments after every event: the current
          EventSequenceEncoderDecoder, a list of current EventSequences, and a
          list of current encoded event inputs.

    Returns:
      event_sequences: A list of extended event sequences. These are modified in
          place but also returned.
      final_states: A list of resulting model states, containing model inputs
          for the next step along with RNN states for each event sequence.
      logliks: A list containing the updated log-likelihood for each event
          sequence.
    """
        # Split the sequences to extend into batches matching the model batch size.
        batch_size = self._batch_size()
        num_seqs = len(event_sequences)
        num_batches = int(np.ceil(num_seqs / float(batch_size)))

        # Extract inputs and RNN states from the model states.
        inputs = [model_state.inputs for model_state in model_states]
        initial_states = [
            model_state.rnn_state for model_state in model_states
        ]

        # Also extract control sequences and states.
        control_sequences = [
            model_state.control_events for model_state in model_states
        ]
        control_states = [
            model_state.control_state for model_state in model_states
        ]

        final_states = []
        logliks = np.array(logliks, dtype=np.float32)

        # Add padding to fill the final batch.
        pad_amt = -len(event_sequences) % batch_size
        padded_event_sequences = event_sequences + [
            copy.deepcopy(event_sequences[-1]) for _ in range(pad_amt)
        ]
        padded_inputs = inputs + [inputs[-1]] * pad_amt
        padded_initial_states = initial_states + [initial_states[-1]] * pad_amt

        for b in range(num_batches):
            i, j = b * batch_size, (b + 1) * batch_size
            pad_amt = max(0, j - num_seqs)
            # Generate a single step for one batch of event sequences.
            batch_final_state, batch_loglik = self._generate_step_for_batch(
                padded_event_sequences[i:j], padded_inputs[i:j],
                state_util.batch(padded_initial_states[i:j], batch_size),
                temperature)
            final_states += state_util.unbatch(batch_final_state,
                                               batch_size)[:j - i - pad_amt]
            logliks[i:j - pad_amt] += batch_loglik[:j - i - pad_amt]

        # Construct inputs for next step.
        if extend_control_events_callback is not None:
            # We are conditioning on control sequences.
            for idx in range(len(control_sequences)):
                # Extend each control sequence to ensure that it is longer than the
                # corresponding event sequence.
                control_states[idx] = extend_control_events_callback(
                    control_sequences[idx], event_sequences[idx],
                    control_states[idx])
            next_inputs = self._config.encoder_decoder.get_inputs_batch(
                control_sequences, event_sequences)
        else:
            next_inputs = self._config.encoder_decoder.get_inputs_batch(
                event_sequences)

        if modify_events_callback:
            # Modify event sequences and inputs for next step.
            modify_events_callback(self._config.encoder_decoder,
                                   event_sequences, next_inputs)

        model_states = [
            ModelState(inputs=inputs,
                       rnn_state=final_state,
                       control_events=control_events,
                       control_state=control_state)
            for inputs, final_state, control_events, control_state in zip(
                next_inputs, final_states, control_sequences, control_states)
        ]

        return event_sequences, model_states, logliks
예제 #8
0
  def _generate_step(self, event_sequences, model_states, logliks, temperature,
                     extend_control_events_callback=None,
                     modify_events_callback=None):
    """Extends a list of event sequences by a single step each.

    This method modifies the event sequences in place. It also returns the
    modified event sequences and updated model states and log-likelihoods.

    Args:
      event_sequences: A list of event sequence objects, which are extended by
          this method.
      model_states: A list of model states, each of which contains model inputs
          and initial RNN states.
      logliks: A list containing the current log-likelihood for each event
          sequence.
      temperature: The softmax temperature.
      extend_control_events_callback: A function that takes three arguments: a
          current control event sequence, a current generated event sequence,
          and the control state. The function should a) extend the control event
          sequence to be one longer than the generated event sequence (or do
          nothing if it is already at least this long), and b) return the
          resulting control state.
      modify_events_callback: An optional callback for modifying the event list.
          Can be used to inject events rather than having them generated. If not
          None, will be called with 3 arguments after every event: the current
          EventSequenceEncoderDecoder, a list of current EventSequences, and a
          list of current encoded event inputs.

    Returns:
      event_sequences: A list of extended event sequences. These are modified in
          place but also returned.
      final_states: A list of resulting model states, containing model inputs
          for the next step along with RNN states for each event sequence.
      logliks: A list containing the updated log-likelihood for each event
          sequence.
    """
    # Split the sequences to extend into batches matching the model batch size.
    batch_size = self._batch_size()
    num_seqs = len(event_sequences)
    num_batches = int(np.ceil(num_seqs / float(batch_size)))

    # Extract inputs and RNN states from the model states.
    inputs = [model_state.inputs for model_state in model_states]
    initial_states = [model_state.rnn_state for model_state in model_states]

    # Also extract control sequences and states.
    control_sequences = [
        model_state.control_events for model_state in model_states]
    control_states = [
        model_state.control_state for model_state in model_states]

    final_states = []
    logliks = np.array(logliks, dtype=np.float32)

    # Add padding to fill the final batch.
    pad_amt = -len(event_sequences) % batch_size
    padded_event_sequences = event_sequences + [
        copy.deepcopy(event_sequences[-1]) for _ in range(pad_amt)]
    padded_inputs = inputs + [inputs[-1]] * pad_amt
    padded_initial_states = initial_states + [initial_states[-1]] * pad_amt

    for b in range(num_batches):
      i, j = b * batch_size, (b + 1) * batch_size
      pad_amt = max(0, j - num_seqs)
      # Generate a single step for one batch of event sequences.
      batch_final_state, batch_loglik = self._generate_step_for_batch(
          padded_event_sequences[i:j],
          padded_inputs[i:j],
          state_util.batch(padded_initial_states[i:j], batch_size),
          temperature)
      final_states += state_util.unbatch(
          batch_final_state, batch_size)[:j - i - pad_amt]
      logliks[i:j - pad_amt] += batch_loglik[:j - i - pad_amt]

    # Construct inputs for next step.
    if extend_control_events_callback is not None:
      # We are conditioning on control sequences.
      for idx in range(len(control_sequences)):
        # Extend each control sequence to ensure that it is longer than the
        # corresponding event sequence.
        control_states[idx] = extend_control_events_callback(
            control_sequences[idx], event_sequences[idx], control_states[idx])
      next_inputs = self._config.encoder_decoder.get_inputs_batch(
          control_sequences, event_sequences)
    else:
      next_inputs = self._config.encoder_decoder.get_inputs_batch(
          event_sequences)

    if modify_events_callback:
      # Modify event sequences and inputs for next step.
      modify_events_callback(
          self._config.encoder_decoder, event_sequences, next_inputs)

    model_states = [ModelState(inputs=inputs, rnn_state=final_state,
                               control_events=control_events,
                               control_state=control_state)
                    for inputs, final_state, control_events, control_state
                    in zip(next_inputs, final_states,
                           control_sequences, control_states)]

    return event_sequences, model_states, logliks