def train(input_variable, target_variable, encoder, decoder, teacher_forcing_ratio, encoder_optimizer, decoder_optimizer, criterion, max_length, ctx): with autograd.record(): loss = F.zeros((1,), ctx=ctx) encoder_hidden = encoder.initHidden(ctx) input_length = input_variable.shape[0] target_length = target_variable.shape[0] encoder_outputs, encoder_hidden = encoder( input_variable.expand_dims(0), encoder_hidden) if input_length < max_length: encoder_outputs = F.concat(encoder_outputs.flatten(), F.zeros((max_length - input_length, encoder.hidden_size), ctx=ctx), dim=0) else: encoder_outputs = encoder_outputs.flatten() decoder_input = F.array([SOS_token], ctx=ctx) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss = F.add(loss, criterion(decoder_output, target_variable[di])) print criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) topi = decoder_output.argmax(axis=1) decoder_input = F.array([topi.asscalar()], ctx=ctx) loss = F.add(loss, criterion(decoder_output, target_variable[di])) if topi.asscalar() == EOS_token: break loss.backward() encoder_optimizer.step(1) decoder_optimizer.step(1) return loss.asscalar()/target_length
def batched_l2_dist(a, b): a_squared = nd.power(nd.norm(a, axis=-1), 2) b_squared = nd.power(nd.norm(b, axis=-1), 2) squared_res = nd.add(nd.linalg_gemm( a, nd.transpose(b, axes=(0, 2, 1)), nd.broadcast_axes(nd.expand_dims(b_squared, axis=-2), axis=1, size=a.shape[1]), alpha=-2 ), nd.expand_dims(a_squared, axis=-1)) res = nd.sqrt(nd.clip(squared_res, 1e-30, np.finfo(np.float32).max)) return res
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): pred = in_data[0] ll = in_data[1] out = nd.add(pred, ll) out = nd.divide(ll, out) # out = (ll/(pred+ll))**2 out = - nd.multiply(out, out) self.assign(in_grad[0], req[0], out)
def forward(self, inputs): emd = self.encoder(inputs) #print(emd.shape) #since the input is shape(batch_size,input(3 characters)) # we need to extract 0th,1st,2nd character from each batch character1 = emd[:, 0, :] character2 = emd[:, 1, :] character3 = emd[:, 2, :] c1_hidden = self.dense1( character1) # green arrow in diagram for character 1 c2_hidden = self.dense1( character2) # green arrow in diagram for character 2 c3_hidden = self.dense1( character3) # green arrow in diagram for character 3 c1_hidden_2 = self.dense2(c1_hidden) # yellow arrow in diagram addition_result = F.add(c2_hidden, c1_hidden_2) # Total c1 + c2 addition_hidden = self.dense2(addition_result) # the yellow arrow addition_result_2 = F.add(addition_hidden, c3_hidden) # Total c1 + c2 final_output = self.dense3(addition_result_2) return final_output
def calculate_l1(params: dict) -> float: """ calculate the L1 Norm on the weights of the passed model """ parameter = params l1 = None for key in parameter: if 'weight' in key: if l1 is None: l1 = parameter[key].data().abs().sum() else: l1 = add(l1, parameter[key].data().abs().sum()) return l1
def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var): if self.inference_update_stat: mean = x.mean(axis=(0, 2, 3)) mean_expanded = F.expand_dims(F.expand_dims(F.expand_dims(mean, axis=0), axis=2), axis=3) var = F.square(F.broadcast_minus(x, mean_expanded)).mean(axis=(0, 2, 3)) running_mean = F.add( F.multiply(self.running_mean.data(), self.momentum.as_in_context(x.context)), F.multiply(mean, self.momentum_rest.as_in_context(x.context))) running_var = F.add( F.multiply(self.running_var.data(), self.momentum.as_in_context(x.context)), F.multiply(var, self.momentum_rest.as_in_context(x.context))) self.running_mean.set_data(running_mean) self.running_var.set_data(running_var) return F.BatchNorm(x, gamma, beta, mean, var, name='fwd', **self._kwargs) else: return F.BatchNorm(x, gamma, beta, running_mean, running_var, name='fwd', **self._kwargs)
def train(input_variable, # single sequence target_variable, classifier, # decoder, classifier_optimizer, # decoder_optimizer, criterion, max_length, ctx): with autograd.record(): loss = F.zeros((1,), ctx=ctx) input_length = input_variable.shape[0] target_length = target_variable.shape[0] classifier_hidden = classifier.init_hidden(ctx) classifier_outputs, classifier_hidden = classifier( input_variable.expand_dims(0), classifier_hidden) # decoder_input = F.array([SOS_token], ctx=ctx) # NOTE: issue here # decoder_hidden = encoder_hidden # decoder_outputs, decoder_hidden = decoder( # target_variable.expand_dims(0), decoder_hidden) # decoder_outputs = decoder(encoder_hidden) # print('enc_out.shape:', classifier_outputs.shape) # print('enc_hidden:', classifier_hidden) for di in range(target_length): # loss = F.add(loss, # criterion(decoder_outputs[di], target_variable[di])) loss = F.add(loss, criterion(classifier_outputs[di], target_variable[di])) # print(criterion(decoder_outputs[di], target_variable[di])) loss.backward() classifier_optimizer.step(1) # decoder_optimizer.step(1) return loss.asscalar() / target_length
def forward(self, output1, output2, label): euclidean_distance = nd.sqrt(nd.sum(nd.power(nd.subtract(output1, output2),2))) loss_contrastive = nd.mean(nd.add(nd.subtract(1,label) * nd.power(euclidean_distance, 2),(label) * nd.power(nd.subtract(self.margin, euclidean_distance), 2))) return loss_contrastive
def train(input_variable, target_variable, encoder, decoder, teacher_forcing_ratio, encoder_optimizer, decoder_optimizer, criterion, max_length, ctx): with autograd.record(): loss = F.zeros((1, ), ctx=ctx) encoder_hidden = encoder.initHidden(ctx) input_length = input_variable.shape[0] target_length = target_variable.shape[0] encoder_outputs, encoder_hidden = encoder( input_variable.expand_dims(0), encoder_hidden) if input_length < max_length: encoder_outputs = F.concat( encoder_outputs.flatten(), F.zeros((max_length - input_length, encoder.hidden_size), ctx=ctx), dim=0) else: encoder_outputs = encoder_outputs.flatten() decoder_input = F.array([SOS_token], ctx=ctx) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss = F.add(loss, criterion(decoder_output, target_variable[di])) print criterion(decoder_output, target_variable[di]) decoder_input = target_variable[di] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) topi = decoder_output.argmax(axis=1) decoder_input = F.array([topi.asscalar()], ctx=ctx) loss = F.add(loss, criterion(decoder_output, target_variable[di])) if topi.asscalar() == EOS_token: break loss.backward() encoder_optimizer.step(1) decoder_optimizer.step(1) return loss.asscalar() / target_length