Exemple #1
0
def train(input_variable, target_variable, encoder, decoder, teacher_forcing_ratio,
          encoder_optimizer, decoder_optimizer, criterion, max_length, ctx):
    with autograd.record():
        loss = F.zeros((1,), ctx=ctx)

        encoder_hidden = encoder.initHidden(ctx)

        input_length = input_variable.shape[0]
        target_length = target_variable.shape[0]

        encoder_outputs, encoder_hidden = encoder(
                input_variable.expand_dims(0), encoder_hidden)

        if input_length < max_length:
            encoder_outputs = F.concat(encoder_outputs.flatten(),
                F.zeros((max_length - input_length, encoder.hidden_size), ctx=ctx), dim=0)
        else:
            encoder_outputs = encoder_outputs.flatten()



        decoder_input = F.array([SOS_token], ctx=ctx)

        decoder_hidden = encoder_hidden

        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

        if use_teacher_forcing:
            # Teacher forcing: Feed the target as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)

                loss = F.add(loss, criterion(decoder_output, target_variable[di]))
                print criterion(decoder_output, target_variable[di])
                decoder_input = target_variable[di]  # Teacher forcing

        else:
            # Without teacher forcing: use its own predictions as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                topi = decoder_output.argmax(axis=1)

                decoder_input = F.array([topi.asscalar()], ctx=ctx)

                loss = F.add(loss, criterion(decoder_output, target_variable[di]))

                if topi.asscalar() == EOS_token:
                    break

        loss.backward()

    encoder_optimizer.step(1)
    decoder_optimizer.step(1)

    return loss.asscalar()/target_length
Exemple #2
0
def batched_l2_dist(a, b):
    a_squared = nd.power(nd.norm(a, axis=-1), 2)
    b_squared = nd.power(nd.norm(b, axis=-1), 2)

    squared_res = nd.add(nd.linalg_gemm(
        a, nd.transpose(b, axes=(0, 2, 1)), nd.broadcast_axes(nd.expand_dims(b_squared, axis=-2), axis=1, size=a.shape[1]), alpha=-2
    ), nd.expand_dims(a_squared, axis=-1))
    res = nd.sqrt(nd.clip(squared_res, 1e-30, np.finfo(np.float32).max))
    return res
    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):

        pred = in_data[0]
        ll = in_data[1]

        out = nd.add(pred, ll)
        out = nd.divide(ll, out)
        # out = (ll/(pred+ll))**2
        out = - nd.multiply(out, out)
        self.assign(in_grad[0], req[0], out)
Exemple #4
0
 def forward(self, inputs):
     emd = self.encoder(inputs)
     #print(emd.shape)
     #since the input is shape(batch_size,input(3 characters))
     # we need to extract 0th,1st,2nd character from each batch
     character1 = emd[:, 0, :]
     character2 = emd[:, 1, :]
     character3 = emd[:, 2, :]
     c1_hidden = self.dense1(
         character1)  # green arrow in diagram for character 1
     c2_hidden = self.dense1(
         character2)  # green arrow in diagram for character 2
     c3_hidden = self.dense1(
         character3)  # green arrow in diagram for character 3
     c1_hidden_2 = self.dense2(c1_hidden)  # yellow arrow in diagram
     addition_result = F.add(c2_hidden, c1_hidden_2)  # Total c1 + c2
     addition_hidden = self.dense2(addition_result)  # the yellow arrow
     addition_result_2 = F.add(addition_hidden, c3_hidden)  # Total c1 + c2
     final_output = self.dense3(addition_result_2)
     return final_output
Exemple #5
0
def calculate_l1(params: dict) -> float:
    """
    calculate the L1 Norm on the weights of the passed model
    """
    parameter = params
    l1 = None
    for key in parameter:
        if 'weight' in key:
            if l1 is None:
                l1 = parameter[key].data().abs().sum()
            else:
                l1 = add(l1, parameter[key].data().abs().sum())
    return l1
    def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var):
        if self.inference_update_stat:
            mean = x.mean(axis=(0, 2, 3))
            mean_expanded = F.expand_dims(F.expand_dims(F.expand_dims(mean,
                                                                      axis=0),
                                                        axis=2),
                                          axis=3)
            var = F.square(F.broadcast_minus(x,
                                             mean_expanded)).mean(axis=(0, 2,
                                                                        3))

            running_mean = F.add(
                F.multiply(self.running_mean.data(),
                           self.momentum.as_in_context(x.context)),
                F.multiply(mean, self.momentum_rest.as_in_context(x.context)))
            running_var = F.add(
                F.multiply(self.running_var.data(),
                           self.momentum.as_in_context(x.context)),
                F.multiply(var, self.momentum_rest.as_in_context(x.context)))
            self.running_mean.set_data(running_mean)
            self.running_var.set_data(running_var)
            return F.BatchNorm(x,
                               gamma,
                               beta,
                               mean,
                               var,
                               name='fwd',
                               **self._kwargs)
        else:
            return F.BatchNorm(x,
                               gamma,
                               beta,
                               running_mean,
                               running_var,
                               name='fwd',
                               **self._kwargs)
Exemple #7
0
def train(input_variable,  # single sequence
          target_variable,
          classifier,
          # decoder,
          classifier_optimizer,
          # decoder_optimizer,
          criterion, max_length, ctx):

    with autograd.record():
        loss = F.zeros((1,), ctx=ctx)

        input_length = input_variable.shape[0]
        target_length = target_variable.shape[0]

        classifier_hidden = classifier.init_hidden(ctx)
        classifier_outputs, classifier_hidden = classifier(
            input_variable.expand_dims(0), classifier_hidden)

        #         decoder_input = F.array([SOS_token], ctx=ctx) # NOTE: issue here
        #         decoder_hidden = encoder_hidden

        #         decoder_outputs, decoder_hidden = decoder(
        #                 target_variable.expand_dims(0), decoder_hidden)
        # decoder_outputs = decoder(encoder_hidden)

        # print('enc_out.shape:', classifier_outputs.shape)
        # print('enc_hidden:', classifier_hidden)


        for di in range(target_length):
            # loss = F.add(loss,
            #              criterion(decoder_outputs[di], target_variable[di]))
            loss = F.add(loss,
                         criterion(classifier_outputs[di],
                                   target_variable[di]))
            # print(criterion(decoder_outputs[di], target_variable[di]))

        loss.backward()

    classifier_optimizer.step(1)
    # decoder_optimizer.step(1)

    return loss.asscalar() / target_length
 def forward(self, output1, output2, label):
     euclidean_distance = nd.sqrt(nd.sum(nd.power(nd.subtract(output1, output2),2))) 
     loss_contrastive = nd.mean(nd.add(nd.subtract(1,label) * nd.power(euclidean_distance, 2),(label) * nd.power(nd.subtract(self.margin, euclidean_distance), 2)))
     return loss_contrastive
Exemple #9
0
def train(input_variable, target_variable, encoder, decoder,
          teacher_forcing_ratio, encoder_optimizer, decoder_optimizer,
          criterion, max_length, ctx):
    with autograd.record():
        loss = F.zeros((1, ), ctx=ctx)

        encoder_hidden = encoder.initHidden(ctx)

        input_length = input_variable.shape[0]
        target_length = target_variable.shape[0]

        encoder_outputs, encoder_hidden = encoder(
            input_variable.expand_dims(0), encoder_hidden)

        if input_length < max_length:
            encoder_outputs = F.concat(
                encoder_outputs.flatten(),
                F.zeros((max_length - input_length, encoder.hidden_size),
                        ctx=ctx),
                dim=0)
        else:
            encoder_outputs = encoder_outputs.flatten()

        decoder_input = F.array([SOS_token], ctx=ctx)

        decoder_hidden = encoder_hidden

        use_teacher_forcing = True if random.random(
        ) < teacher_forcing_ratio else False

        if use_teacher_forcing:
            # Teacher forcing: Feed the target as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)

                loss = F.add(loss,
                             criterion(decoder_output, target_variable[di]))
                print criterion(decoder_output, target_variable[di])
                decoder_input = target_variable[di]  # Teacher forcing

        else:
            # Without teacher forcing: use its own predictions as the next input
            for di in range(target_length):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                topi = decoder_output.argmax(axis=1)

                decoder_input = F.array([topi.asscalar()], ctx=ctx)

                loss = F.add(loss,
                             criterion(decoder_output, target_variable[di]))

                if topi.asscalar() == EOS_token:
                    break

        loss.backward()

    encoder_optimizer.step(1)
    decoder_optimizer.step(1)

    return loss.asscalar() / target_length