コード例 #1
0
ファイル: uisrnn.py プロジェクト: vickianand/uis-rnn
    def _update_beam_state(self, beam_state, look_ahead_seq, cluster_seq):
        """Update a beam state given a look ahead sequence and known cluster
    assignments.

    Args:
      beam_state: A BeamState object.
      look_ahead_seq: Look ahead sequence, size: look_ahead*D.
        look_ahead: number of step to look ahead in the beam search.
        D: observation dimension
      cluster_seq: Cluster assignment sequence for look_ahead_seq.

    Returns:
      new_beam_state: An updated BeamState object.
    """

        loss = 0
        new_beam_state = BeamState(beam_state)
        for sub_idx, cluster in enumerate(cluster_seq):
            if cluster > len(new_beam_state.mean_set):  # invalid trace
                new_beam_state.neg_likelihood = float('inf')
                break
            elif cluster < len(new_beam_state.mean_set):  # existing cluster
                last_cluster = new_beam_state.trace[-1]
                loss = loss_func.weighted_mse_loss(
                    input_tensor=torch.squeeze(
                        new_beam_state.mean_set[cluster]),
                    target_tensor=look_ahead_seq[sub_idx, :],
                    weight=1 / (2 * self.sigma2)).cpu().detach().numpy()
                if cluster == last_cluster:
                    loss -= np.log(1 - self.transition_bias)
                else:
                    loss -= np.log(self.transition_bias) + np.log(
                        new_beam_state.block_counts[cluster]) - np.log(
                            sum(new_beam_state.block_counts) + self.crp_alpha)
                # update new mean and new hidden
                mean, hidden = self.rnn_model(
                    look_ahead_seq[sub_idx, :].unsqueeze(0).unsqueeze(0),
                    new_beam_state.hidden_set[cluster])
                new_beam_state.mean_set[cluster] = (
                    new_beam_state.mean_set[cluster] *
                    ((np.array(new_beam_state.trace) == cluster).sum() -
                     1).astype(float) + mean.clone()) / (np.array(
                         new_beam_state.trace) == cluster).sum().astype(
                             float)  # use mean to predict
                new_beam_state.hidden_set[cluster] = hidden.clone()
                if cluster != last_cluster:
                    new_beam_state.block_counts[cluster] += 1
                new_beam_state.trace.append(cluster)
            else:  # new cluster
                init_input = autograd.Variable(
                    torch.zeros(
                        self.observation_dim)).unsqueeze(0).unsqueeze(0).to(
                            self.device)
                mean, hidden = self.rnn_model(init_input, self.rnn_init_hidden)
                loss = loss_func.weighted_mse_loss(
                    input_tensor=torch.squeeze(mean),
                    target_tensor=look_ahead_seq[sub_idx, :],
                    weight=1 / (2 * self.sigma2)).cpu().detach().numpy()
                loss -= np.log(self.transition_bias) + np.log(
                    self.crp_alpha) - np.log(
                        sum(new_beam_state.block_counts) + self.crp_alpha)
                # update new min and new hidden
                mean, hidden = self.rnn_model(
                    look_ahead_seq[sub_idx, :].unsqueeze(0).unsqueeze(0),
                    hidden)
                new_beam_state.append(mean, hidden, cluster)
            new_beam_state.neg_likelihood += loss
        return new_beam_state
コード例 #2
0
ファイル: uisrnn.py プロジェクト: vickianand/uis-rnn
    def fit_concatenated(self, train_sequence, train_cluster_id, args):
        """Fit UISRNN model to concatenated sequence and cluster_id.

    Args:
      train_sequence: the training observation sequence, which is a
        2-dim numpy array of real numbers, of size `N * D`.

        - `N`: summation of lengths of all utterances.
        - `D`: observation dimension.

        For example,
      ```
      train_sequence =
      [[1.2 3.0 -4.1 6.0]    --> an entry of speaker #0 from utterance 'iaaa'
       [0.8 -1.1 0.4 0.5]    --> an entry of speaker #1 from utterance 'iaaa'
       [-0.2 1.0 3.8 5.7]    --> an entry of speaker #0 from utterance 'iaaa'
       [3.8 -0.1 1.5 2.3]    --> an entry of speaker #0 from utterance 'ibbb'
       [1.2 1.4 3.6 -2.7]]   --> an entry of speaker #0 from utterance 'ibbb'
      ```
        Here `N=5`, `D=4`.

        We concatenate all training utterances into this single sequence.
      train_cluster_id: the speaker id sequence, which is 1-dim list or
        numpy array of strings, of size `N`.
        For example,
      ```
      train_cluster_id =
        ['iaaa_0', 'iaaa_1', 'iaaa_0', 'ibbb_0', 'ibbb_0']
      ```
        'iaaa_0' means the entry belongs to speaker #0 in utterance 'iaaa'.

        Note that the order of entries within an utterance are preserved,
        and all utterances are simply concatenated together.
      args: Training configurations. See `arguments.py` for details.

    Raises:
      TypeError: If train_sequence or train_cluster_id is of wrong type.
      ValueError: If train_sequence or train_cluster_id has wrong dimension.
    """
        # check type
        if (not isinstance(train_sequence, np.ndarray)
                or train_sequence.dtype != float):
            raise TypeError(
                'train_sequence should be a numpy array of float type.')
        if isinstance(train_cluster_id, list):
            train_cluster_id = np.array(train_cluster_id)
        if (not isinstance(train_cluster_id, np.ndarray)
                or not train_cluster_id.dtype.name.startswith(
                    ('str', 'unicode'))):
            raise TypeError(
                'train_cluster_id type be a numpy array of strings.')
        # check dimension
        if train_sequence.ndim != 2:
            raise ValueError('train_sequence must be 2-dim array.')
        if train_cluster_id.ndim != 1:
            raise ValueError('train_cluster_id must be 1-dim array.')
        # check length and size
        train_total_length, observation_dim = train_sequence.shape
        if observation_dim != self.observation_dim:
            raise ValueError(
                'train_sequence does not match the dimension specified '
                'by args.observation_dim.')
        if train_total_length != len(train_cluster_id):
            raise ValueError('train_sequence length is not equal to '
                             'train_cluster_id length.')

        self.rnn_model.train()
        optimizer = self._get_optimizer(optimizer=args.optimizer,
                                        learning_rate=args.learning_rate)

        (sub_sequences, seq_lengths, transition_bias,
         transition_bias_denominator) = utils.resize_sequence(
             sequence=train_sequence,
             cluster_id=train_cluster_id,
             num_permutations=args.num_permutations)
        if self.estimate_transition_bias:
            if self.transition_bias is None:
                self.transition_bias = transition_bias
                self.transition_bias_denominator = transition_bias_denominator
            else:
                self.transition_bias = (
                    self.transition_bias * self.transition_bias_denominator +
                    transition_bias * transition_bias_denominator) / (
                        self.transition_bias_denominator +
                        transition_bias_denominator)
                self.transition_bias_denominator += transition_bias_denominator

        # For batch learning, pack the entire dataset.
        if args.batch_size is None:
            packed_train_sequence, rnn_truth = utils.pack_sequence(
                sub_sequences, seq_lengths, args.batch_size,
                self.observation_dim, self.device)
        train_loss = []
        for num_iter in range(args.train_iteration):
            optimizer.zero_grad()
            # For online learning, pack a subset in each iteration.
            if args.batch_size is not None:
                packed_train_sequence, rnn_truth = utils.pack_sequence(
                    sub_sequences, seq_lengths, args.batch_size,
                    self.observation_dim, self.device)
            hidden = self.rnn_init_hidden.repeat(1, args.batch_size, 1)
            mean, _ = self.rnn_model(packed_train_sequence, hidden)
            # use mean to predict
            mean = torch.cumsum(mean, dim=0)
            mean_size = mean.size()
            mean = torch.mm(
                torch.diag(
                    1.0 /
                    torch.arange(1, mean_size[0] + 1).float().to(self.device)),
                mean.view(mean_size[0], -1))
            mean = mean.view(mean_size)

            # Likelihood part.
            loss1 = loss_func.weighted_mse_loss(
                input_tensor=(rnn_truth != 0).float() * mean[:-1, :, :],
                target_tensor=rnn_truth,
                weight=1 / (2 * self.sigma2))

            # Sigma2 prior part.
            weight = (((rnn_truth != 0).float() * mean[:-1, :, :] -
                       rnn_truth)**2).view(-1, observation_dim)
            num_non_zero = torch.sum((weight != 0).float(), dim=0).squeeze()
            loss2 = loss_func.sigma2_prior_loss(num_non_zero, args.sigma_alpha,
                                                args.sigma_beta, self.sigma2)

            # Regularization part.
            loss3 = loss_func.regularization_loss(self.rnn_model.parameters(),
                                                  args.regularization_weight)

            loss = loss1 + loss2 + loss3
            loss.backward()
            nn.utils.clip_grad_norm_(self.rnn_model.parameters(),
                                     args.grad_max_norm)
            optimizer.step()
            # avoid numerical issues
            self.sigma2.data.clamp_(min=1e-6)

            if (np.remainder(num_iter, 10) == 0
                    or num_iter == args.train_iteration - 1):
                self.logger.print(
                    2, 'Iter: {:d}  \t'
                    'Training Loss: {:.4f}    \n'
                    '    Negative Log Likelihood: {:.4f}\t'
                    'Sigma2 Prior: {:.4f}\t'
                    'Regularization: {:.4f}'.format(num_iter, float(loss.data),
                                                    float(loss1.data),
                                                    float(loss2.data),
                                                    float(loss3.data)))
            train_loss.append(float(
                loss1.data))  # only save the likelihood part
        self.logger.print(
            1, 'Done training with {} iterations'.format(args.train_iteration))