def evaluate_edit_distance(data_loader: AsyncDataLoader, model):
    '''
    Measures the mean (over instances) of the characterwise edit distance (Levenshtein distance) between predicted and true names
    '''
    logged_example = False
    with data_loader as data_loader:
        cum_edit_distance = 0
        for split_batch, batch_length in tqdm(data_loader,
                                              total=data_loader.total_batches):
            batches_outputs = [(batch, model(batch.data))
                               for batch in split_batch]
            for batch, output in batches_outputs:
                predictions_labels = model.unbatchify(batch, output)
                for prediction, label in predictions_labels:
                    if not logged_example:
                        logger.info('Some example predictions:\n{}'.format(
                            pprint.pformat(predictions_labels[:10])))
                        logged_example = True
                    pred_name = ''.join(prediction)
                    real_name = ''.join(label)
                    cum_edit_distance += editdistance.eval(
                        pred_name, real_name)
    return cum_edit_distance / len(data_loader)

    pred = []
    true = []
    for i in tqdm(range(0, math.ceil(len(dataset) / n_batch))):
        data = dataset[n_batch * i:n_batch * (i + 1)]
        graph, label = model.batchify(data, ctx)
        output = model(graph)
        predictions = nd.argmax(output, axis=2)
        # Masking output to max(length_of_output, length_of_label)
        output_preds = predictions.asnumpy()
        output_lengths = []
        for row in output_preds:
            end_token_idxs = np.where(row == 0)[0]
            if len(end_token_idxs):
                output_lengths.append(int(min(end_token_idxs)))
            else:
                output_lengths.append(model.max_name_length)
        output_lengths = nd.array(output_lengths, ctx=ctx)
        mask_lengths = nd.maximum(output_lengths, label.value_lengths)

        output = nd.SequenceMask(predictions,
                                 value=-1,
                                 use_sequence_length=True,
                                 sequence_length=mask_lengths,
                                 axis=1).asnumpy().astype('int32')
        labels = nd.SequenceMask(label.values,
                                 value=-1,
                                 use_sequence_length=True,
                                 sequence_length=mask_lengths.astype('int32'),
                                 axis=1).asnumpy()

        pred += [i for i in output.flatten().tolist() if i >= 0]
        true += [i for i in labels.flatten().tolist() if i >= 0]
    return metrics.f1_score(true, pred, average='weighted')
Ejemplo n.º 2
0
def RMSE_many_to_many(predictions, labels, data_lengths):
    loss = (labels.expand_dims(axis=2) - predictions).square()
    loss = nd.SequenceMask(loss, data_lengths, use_sequence_length=True, axis=1)
    weight = 1 / (labels + 1)
    loss_no_weight = loss.sum(axis=1).squeeze() / data_lengths.astype('float32')
    loss_weighted = ((loss.squeeze() * weight).sum(axis=1).squeeze() / data_lengths.astype('float32'))
    return loss_weighted, loss_no_weight
Ejemplo n.º 3
0
    def forward(self,  pred, label, valid_length): # pylint: disable=arguments-differ
        """

        Parameters
        ----------
        F
        pred : Symbol or NDArray
            Shape (batch_size, length, V)
        label : Symbol or NDArray
            Shape (batch_size, length)
        valid_length : Symbol or NDArray
            Shape (batch_size, )
        Returns
        -------
        loss : Symbol or NDArray
            Shape (batch_size,)
        """
        if self._sparse_label:
            sample_weight = nd.cast(nd.expand_dims(nd.ones_like(label), axis=-1), dtype=np.float32)
        else:
            sample_weight = nd.ones_like(label)
        sample_weight = nd.SequenceMask(sample_weight,
                                       sequence_length=valid_length,
                                       use_sequence_length=True,
                                       axis=1)
        return super(SoftmaxCEMaskedLoss, self).forward( pred, label, sample_weight)
    def hybrid_forward(self, F, output, *args, **kwargs):
        '''
        Returns the Softmax Cross Entropy loss of a model with a graph vocab, in the style of a sentinel pointer network
        Note: Unlike VarNamingLoss, this Loss DOES expect the last dimension of output to be probabilities summing to 1
        '''
        (label, _), data_encoder = args
        joint_label, label_lengths = label.values, label.value_lengths
        # We're using pick and not just sparse labels for XEnt b/c there can be multiple ways to point to the correct subtoken
        loss = nd.pick(output, joint_label, axis=2)

        # Masking outputs to max(length_of_output (based on emitting value 0), length_of_label)
        output_preds = nd.argmax(output, axis=2).asnumpy()
        output_lengths = []
        for row in output_preds:
            end_token_idxs = np.where(row == 0)[0]
            if len(end_token_idxs):
                output_lengths.append(int(min(end_token_idxs)) + 1)
            else:
                output_lengths.append(output.shape[1])
        output_lengths = nd.array(output_lengths, ctx=output.context)
        mask_lengths = nd.maximum(output_lengths, label_lengths)
        loss = nd.SequenceMask(loss,
                               value=1.0,
                               use_sequence_length=True,
                               sequence_length=mask_lengths,
                               axis=1)

        return nd.mean(-nd.log(loss), axis=0, exclude=True)
Ejemplo n.º 5
0
 def check_sequence_mask():
     # Sequence Mask input [max_sequence_length, batch_size]
     # test with input batch_size = 2
     a = nd.arange(0, LARGE_X * 2).reshape(LARGE_X, 2)
     # test as identity operator
     b = nd.SequenceMask(a)
     assert b[-1][0] == a[-1][0]
     assert b.shape == a.shape
     # test with default mask
     b = nd.SequenceMask(a, sequence_length=nd.array([1, 1]),
                         use_sequence_length=True)
     assert b[0][1] == a[0][1]  # first sequence of each batch kept
     assert b[-1][-1] != a[-1][-1]  # rest sequences masked
     assert b[-1][-1] == 0
     # test with mask value
     b = nd.SequenceMask(a, sequence_length=nd.array([1, 1]),
                         use_sequence_length=True, value=-1)
     assert b[-1][-1] == -1
Ejemplo n.º 6
0
def masked_softmax(X, valid_length):
    if valid_length is None:
        return X.softmax()
    else:
        shape = X.shape
        if valid_length.ndim == 1:
            valid_length = valid_length.repeat(shape[1], axis=0)
        else:
            valid_length = valid_length.reshape((-1,))
        X = nd.SequenceMask(X.reshape((-1, shape[-1])), valid_length, True,
                            axis=1, value=-1e6)
        return X.softmax().reshape(shape)
Ejemplo n.º 7
0
    def forward(self, x, length, hidden=None):

        #feed forward
        outputs = self.rnn(x)  #outputs:[batch, seq_length, 2*num_hiddens]
        if hidden is not None:
            outputs, state = self.rnn(x, hidden)
        outputs = nd.transpose(outputs, (1, 0, 2))
        outputs = nd.SequenceMask(outputs,
                                  sequence_length=length,
                                  use_sequence_length=True,
                                  value=0)
        outputs = nd.transpose(outputs, (1, 0, 2))
        hidden = [output[i - 1] for (output, i) in zip(outputs, length)]
        hidden = nd.stack(*hidden).squeeze()
        return outputs, hidden
Ejemplo n.º 8
0
Archivo: train.py Proyecto: roec/d2l-en
 def forward(self, pred, label, valid_length):
     weights = nd.ones_like(label).expand_dims(axis=-1)
     weights = nd.SequenceMask(weights, valid_length, True, axis=1)
     return super(MaskedSoftmaxCELoss, self).forward(pred, label, weights)
Ejemplo n.º 9
0
    def calulation(self, input_str, ko_dict, en_dict, en_rev_dict, ctx):
        """
        inference 코드 
        """
        #앞뒤에 START,END 코드 추가
        input_str = [
            [
                'START',
            ] + mecab.morphs(input_str.strip()) + [
                'END',
            ],
        ]
        X = encoding_and_padding(input_str,
                                 ko_dict,
                                 max_seq=self.max_seq_length)
        #string to embed
        inputs = F.array(X, ctx=ctx)

        inputs = F.cast(inputs, dtype='float32')
        in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx,
                                            F.ones_like(inputs),
                                            F.zeros_like(inputs)),
                                    axis=1)

        #encoder GRU
        embeddinged_in = F.cast(self.embedding(inputs), dtype='float32')
        next_h = F.random.normal(0, 1, (1, self.n_hidden), ctx=ctx)
        for j in range(self.in_seq_len):
            p_outputs = F.slice_axis(embeddinged_in,
                                     axis=1,
                                     begin=j,
                                     end=j + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            enout, (next_h, ) = self.encoder(p_outputs, [
                next_h,
            ])
            if j == 0:
                enouts = enout
                next_hs = next_h
            else:
                enouts = F.concat(enouts, enout, dim=1)
                next_hs = F.concat(next_hs, next_h, dim=1)
        #masking with 0 using length
        enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden))
        enouts = F.transpose(enouts, (1, 0, 2))
        enouts = F.SequenceMask(enouts,
                                sequence_length=in_sent_last_idx + 1,
                                use_sequence_length=True)
        enouts = F.transpose(enouts, (1, 0, 2))

        next_hs = F.reshape(next_hs, (-1, self.n_hidden))
        #take가 0 dim만 지원하기 때문에..
        # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...)
        next_hs = next_hs.take(in_sent_last_idx)

        #디코더의 초기 입력값으로 넣을 'START'를 임베딩한다.
        Y_init = F.array([
            [
                en_dict['START'],
            ],
        ], ctx=ctx)
        Y_init = F.cast(self.embedding(Y_init), dtype='float32')
        deout = Y_init[:, 0, :]

        #출력 시퀀스 길이만큼 순회
        for i in range(self.out_seq_len):
            if self.attention:
                #print(deout.shape)
                deout, att_weight = self.apply_attention(
                    F=F, inputs=deout, hidden=next_hs, encoder_outputs=enouts)
                if i == 0:
                    att_weights = att_weight
                else:
                    att_weights = F.concat(att_weights, att_weight, dim=0)
            deout, (next_hs, ) = self.decoder(deout, [
                next_hs,
            ])
            #batchnorm을 적용하기 위해 차원 증가/원복
            deout = F.expand_dims(deout, axis=1)
            deout = self.batchnorm(deout)
            #reduce dim
            deout = deout[:, 0, :]
            #'START'의 다음 시퀀스 출력값도출
            deout_sm = self.dense(deout)
            #print(deout_sm.shape)
            deout = F.one_hot(F.argmax(F.softmax(deout_sm, axis=1), axis=1),
                              depth=self.vocab_size)
            #print(deout.shape)
            #decoder에 들어갈 수 있는 형태로 변환(임베딩 적용 및 차원 맞춤)
            deout = F.argmax(deout, axis=1)
            deout = F.expand_dims(deout, axis=0)
            deout = F.cast(self.embedding(deout)[:, 0, :], dtype='float32')
            gen_char = en_rev_dict[F.argmax(deout_sm,
                                            axis=1).asnumpy()[0].astype('int')]
            if gen_char == '__PAD__' or gen_char == 'END':
                break
            else:
                if i == 0:
                    ret_seq = [
                        gen_char,
                    ]
                else:
                    ret_seq += [
                        gen_char,
                    ]
        return (" ".join(ret_seq), att_weights)
Ejemplo n.º 10
0
    def hybrid_forward(self, F, inputs, outputs, initial_hidden_state,
                       batch_size_seq):
        #문장 길이 2 == END tag index
        inputs = F.cast(inputs, dtype='float32')
        in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx,
                                            F.ones_like(inputs),
                                            F.zeros_like(inputs)),
                                    axis=1)

        outputs = F.cast(outputs, dtype='float32')
        out_sent_last_idx = F.argmax(F.where(outputs == self.end_idx,
                                             F.ones_like(outputs),
                                             F.zeros_like(outputs)),
                                     axis=1)
        #encoder GRU
        embeddinged_in = F.cast(self.embedding(inputs), dtype='float32')

        next_h = initial_hidden_state
        for j in range(self.in_seq_len):
            p_outputs = F.slice_axis(embeddinged_in,
                                     axis=1,
                                     begin=j,
                                     end=j + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            enout, (next_h, ) = self.encoder(p_outputs, [
                next_h,
            ])
            if j == 0:
                enouts = enout
                next_hs = next_h
            else:
                enouts = F.concat(enouts, enout, dim=1)
                next_hs = F.concat(next_hs, next_h, dim=1)
        #masking with 0 using length
        enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden))
        enouts = F.transpose(enouts, (1, 0, 2))
        enouts = F.SequenceMask(enouts,
                                sequence_length=in_sent_last_idx + 1,
                                use_sequence_length=True)
        enouts = F.transpose(enouts, (1, 0, 2))

        next_hs = F.reshape(next_hs, (-1, self.n_hidden))
        #take가 0 dim만 지원하기 때문에..
        # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...)
        next_hs = next_hs.take(in_sent_last_idx +
                               (batch_size_seq * self.max_seq_length))
        embeddinged_out = F.cast(self.embedding(outputs), dtype='float32')

        #decoder GRU with attention
        for i in range(self.out_seq_len):
            #out_seq_len 길이만큼 GRUCell을 unroll하면서 출력값을 적재한다.
            p_outputs = F.slice_axis(embeddinged_out,
                                     axis=1,
                                     begin=i,
                                     end=i + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            # p_outputs = outputs[:,i,:]
            # 위와 같이 진행한 이유는 hybridize를 위함
            if self.attention:
                p_outputs, _ = self.apply_attention(F=F,
                                                    inputs=p_outputs,
                                                    hidden=next_hs,
                                                    encoder_outputs=enouts)
            deout, (next_hs, ) = self.decoder(p_outputs, [
                next_hs,
            ])
            if i == 0:
                deouts = deout
            else:
                deouts = F.concat(deouts, deout, dim=1)
        #2dim -> 3dim 으로 reshape
        deouts = F.reshape(deouts, (-1, self.out_seq_len, self.n_hidden))
        #0 padding
        deouts = F.transpose(deouts, (1, 0, 2))
        deouts = F.SequenceMask(deouts,
                                sequence_length=out_sent_last_idx + 1,
                                use_sequence_length=True)
        deouts = F.transpose(deouts, (1, 0, 2))

        deouts = self.batchnorm(deouts)
        deouts_fc = self.dense(deouts)
        return (deouts_fc)
Ejemplo n.º 11
0
    def __init__(self, num_inputs, num_hiddens, batch_first, drop_prob):
        super(LSTM, self).__init__()
        self.drop_prob = drop_prob
        self.batch_first = batch_first
        if batch_first == True:
            self.layout = 'NTC'
        else:
            self.layout = 'TNC'
        self.rnn = rnn.LSTM(num_hiddens,
                            layout=self.layout,
                            dropout=drop_prob,
                            bidirectional=True,
                            input_size=num_inputs,
                            i2h_weight_initializer='Orthogonal',
                            h2h_weight_initializer='Orthogonal')

    def forward(self, x, length, (hidden, cell)=None):

        outputs = self.rnn(x)  #outputs:[batch, seq_length, 2*num_hiddens]
        if (hidden, cell) is not None:
            outputs, state = self.rnn(x, (hidden, cell))
        outputs = nd.transpose(outputs, (1, 0, 2))
        outputs = nd.SequenceMask(outputs,
                                  sequence_length=length,
                                  use_sequence_length=True,
                                  value=0)
        outputs = nd.transpose(outputs, (1, 0, 2))
        hidden = [output[i - 1] for (output, i) in zip(outputs, length)]
        hidden = nd.stack(*hidden).squeeze()
        return outputs, hidden