def test_parameter_sharing_nested_scopes(self): # Test parameter sharing with scope.NameScope('global_scope'): with ParameterSharing({'model_b': 'model_a'}): param_global = parameter_sharing_context.get_parameter_name( 'w') self.assertEquals(param_global, 'global_scope/w') # This scope is overridden to match 'model_a' with scope.NameScope('model_b'): with ParameterSharing({'shared_scope': ''}): param_4 = parameter_sharing_context.get_parameter_name( 'w') self.assertEquals(param_4, 'global_scope/model_a/w') with scope.NameScope('shared_scope'): param_5 = parameter_sharing_context.\ get_parameter_name('w') self.assertEquals(param_5, 'global_scope/model_a/w') # This scope is supposed to have not sharing with scope.NameScope('model_c'): with ParameterSharing({'shared_scope': ''}): param_4 = parameter_sharing_context.get_parameter_name( 'w') self.assertEquals(param_4, 'global_scope/model_c/w') with scope.NameScope('shared_scope'): param_5 = parameter_sharing_context.\ get_parameter_name('w') self.assertEquals(param_5, 'global_scope/model_c/w')
def test_deep_hierarchy(self): model = model_helper.ModelHelper(name="test") with ParameterSharing({'a': 'b'}): with scope.NameScope('a'): with ParameterSharing({'c': 'd'}): with scope.NameScope('c'): with ParameterSharing({'e': 'f'}): with scope.NameScope('e'): p = model.create_param( 'w', shape=[2], initializer=Initializer("ConstantFill")) self.assertNotEqual(model.get_param_info(p), None)
def test_parameter_sharing_brew(self): # Test no sharing default scopes model = model_helper.ModelHelper(name="test") data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=16, dim_out=16) # Shared params are expected to share the same shape and fail if it's # not true with self.assertRaises(AssertionError): _ = brew.fc(model, data, "fc1", dim_in=2, dim_out=2) # noqa output_blobs = set() with scope.NameScope('some_global_scope'): with scope.NameScope('model_a'): output_blobs.add(str(brew.fc(model, fc1, 'output', 16, 16))) with ParameterSharing({'model_b': 'model_a'}),\ scope.NameScope('model_b'): with ParameterSharing({'shared_1': '', 'shared_2': ''}): # All params in DenseLayers from shared_1, shared_2 and # model_a are shared and will be pointing to: # [some_global_scope/model_a/output_W, # some_global_scope/model_a/output_b] with scope.NameScope('shared_1'): output_blobs.add( str(brew.fc(model, fc1, 'output', 16, 16))) with scope.NameScope('shared_2'): output_blobs.add( str(brew.fc(model, fc1, 'output', 16, 16))) # Params of this layer are not shared with anyone unless # there is some explicit sharing with model_a/unshared (not # in this example). # Names of the blobs are # [some_global_scope/model_a/unshared/output_W, # some_global_scope/model_a/unshared/output_b] with scope.NameScope('unshared'): output_blobs.add( str(brew.fc(model, fc1, 'output', 16, 16))) self.assertEqual(len(model._parameters_info), 6) self.assertEqual(len(output_blobs), 4) self.assertEqual(sorted(model._parameters_info.keys()), [ 'fc1_b', 'fc1_w', 'some_global_scope/model_a/output_b', 'some_global_scope/model_a/output_w', 'some_global_scope/model_a/unshared/output_b', 'some_global_scope/model_a/unshared/output_w', ]) model.Validate()
def test_layer_duplicated_parameter_init(self): output_dims = 2 with scope.NameScope('global_scope'): with ParameterSharing({'new_fc': 'shared_fc'}): self.model.FC( self.model.input_feature_schema.float_features, output_dims, name='shared_fc' ) self.model.FC( self.model.input_feature_schema.float_features, output_dims, name='new_fc' ) train_init_net = core.Net('train_init_net') train_net = core.Net('train_net') for layer in self.model.layers: layer.add_operators(train_net, train_init_net) op_outputs = [] for op in train_init_net._net.op: op_outputs.extend(op.output) # only fill these parameter blobs once self.assertEquals( sorted(op_outputs), ['global_scope/shared_fc/b', 'global_scope/shared_fc/w'] )
def apply_over_sequence( self, model, inputs, seq_lengths, initial_states, outputs_with_grads=None, ): inputs = self.cell.prepare_input(model, inputs) # Now they are blob references - outputs of splitting the input sequence split_inputs = model.net.Split( inputs, [str(inputs) + "_timestep_{}".format(i) for i in range(self.T)], axis=0) if self.T == 1: split_inputs = [split_inputs] states = initial_states all_states = [] for t in range(0, self.T): scope_name = "timestep_{}".format(t) # Parameters of all timesteps are shared with ParameterSharing({scope_name: ''}),\ scope.NameScope(scope_name): timestep = model.param_init_net.ConstantFill( [], "timestep", value=t, shape=[1], dtype=core.DataType.INT32, device_option=core.DeviceOption(caffe2_pb2.CPU)) states = self.cell._apply( model=model, input_t=split_inputs[t], seq_lengths=seq_lengths, states=states, timestep=timestep, ) all_states.append(states) all_states = zip(*all_states) all_states = [ model.net.Concat(list(full_output), [ str(full_output[0])[len("timestep_0/"):] + "_concat", str(full_output[0])[len("timestep_0/"):] + "_concat_info" ], axis=0)[0] for full_output in all_states ] outputs = tuple( six.next(it) for it in itertools.cycle([iter(all_states), iter(states)])) outputs_without_grad = set(range( len(outputs))) - set(outputs_with_grads) for i in outputs_without_grad: model.net.ZeroGradient(outputs[i], []) logging.debug("Added 0 gradients for blobs:", [outputs[i] for i in outputs_without_grad]) return None, outputs
def test_layer_shared_parameter_name_within_same_namescope(self): output_dims = 2 with scope.NameScope('global_scope'): with ParameterSharing({'fc_auto_0': 'fc'}): self.model.FC(self.model.input_feature_schema.float_features, output_dims) self.assertEquals(self.model.layers[-1].w, 'global_scope/fc/w') self.model.FC(self.model.input_feature_schema.float_features, output_dims) self.assertEquals(self.model.layers[-1].w, 'global_scope/fc/w')
def test_layer_shared_parameter_name_different_shapes(self): output_dims = 2 with scope.NameScope('global_scope'): with ParameterSharing({'fc_auto_0': 'fc'}): self.model.FC(self.model.input_feature_schema.float_features, output_dims) self.assertEquals(self.model.layers[-1].w, 'global_scope/fc/w') with six.assertRaisesRegex(self, ValueError, 'Got inconsistent shapes .*'): self.model.FC( self.model.input_feature_schema.float_features, output_dims + 1)
def test_parameter_sharing_subscopes(self): # Sharing only one of the subscopes with ParameterSharing({'global_scope/b': 'global_scope/a'}): with scope.NameScope('global_scope'): param_6 = parameter_sharing_context.get_parameter_name('w') self.assertEquals(param_6, 'global_scope/w') with scope.NameScope('a'): param_7 = parameter_sharing_context.get_parameter_name('w') self.assertEquals(param_7, 'global_scope/a/w') with scope.NameScope('b'): param_8 = parameter_sharing_context.get_parameter_name('w') self.assertEquals(param_8, 'global_scope/a/w') with scope.NameScope('c'): param_9 = parameter_sharing_context.get_parameter_name('w') self.assertEquals(param_9, 'global_scope/c/w')
def test_layer_shared_parameter_name_within_same_namescope_customized_name( self): output_dims = 2 with scope.NameScope('global_scope'): with ParameterSharing({'new_fc': 'shared_fc'}): self.model.FC(self.model.input_feature_schema.float_features, output_dims, name='shared_fc') self.assertEquals(self.model.layers[-1].w, 'global_scope/shared_fc/w') self.model.FC(self.model.input_feature_schema.float_features, output_dims, name='new_fc') self.assertEquals(self.model.layers[-1].w, 'global_scope/shared_fc/w')
def test_layer_shared_parameter_name_different_namescopes(self): output_dims = 2 with scope.NameScope('global_scope'): with ParameterSharing({'scope_1': 'scope_0'}): with scope.NameScope('scope_0'): fc1_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims) self.assertEquals(self.model.layers[-1].w, 'global_scope/scope_0/fc/w') self.assertEquals(fc1_output(), 'global_scope/scope_0/fc/output') with scope.NameScope('scope_1'): fc2_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims) self.assertEquals(self.model.layers[-1].w, 'global_scope/scope_0/fc/w') self.assertEquals(fc2_output(), 'global_scope/scope_1/fc/output')
def test_layer_shared_parameter_optim_validator(self): """ This test is to cover the _validate_param_optim function in layer_model_helper class. """ output_dims = 2 adagrad_optim = AdagradOptimizer( alpha=0.004, epsilon=0.02, ) self.model.default_optimizer = adagrad_optim # the following covers the branch -- optim is None with scope.NameScope('global_scope_0'): with ParameterSharing({'scope_1': 'scope_0'}): with scope.NameScope('scope_0'): fc1_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims, weight_optim=self.model.NoOptim, ) with scope.NameScope('scope_1'), self.assertRaises(Exception): fc2_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims) # the following covers the branch -- optim is NoOptim with scope.NameScope('global_scope_1'): with ParameterSharing({'scope_1': 'scope_0'}): with scope.NameScope('scope_0'): fc1_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims, weight_optim=None, ) with scope.NameScope('scope_1'), self.assertRaises(Exception): fc2_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims, weight_optim=self.model.NoOptim, ) # the following covers the branch -- optim is an instance of Optimizer adagrad_optim_2 = AdagradOptimizer( alpha=0.005, epsilon=0.02, ) adam_optim = AdamOptimizer() self.model.default_optimizer = adagrad_optim_2 with scope.NameScope('global_scope_2'): with ParameterSharing({ 'scope_1': 'scope_0', 'scope_2': 'scope_0' }): with scope.NameScope('scope_0'): fc1_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims, weight_optim=None, # it will use adagrad_optim_2 ) with scope.NameScope('scope_1'), self.assertRaises(Exception): fc2_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims, weight_optim=adagrad_optim, ) with scope.NameScope('scope_2'), self.assertRaises(Exception): fc2_output = self.model.FC( self.model.input_feature_schema.float_features, output_dims, weight_optim=adam_optim, )
def build_adjoint_mlp( model, input_dim=1, hidden_dims=[5, 5], output_dim=1, optim=None, ): ''' Precondition: model.input_feature_schema.origin_input has shape of (input_dim, ) model.input_feature_schema.adjoint_input has shape of (output_dim, ) Note: adjoint_input is binary array, e.g. [1, 0], which is used as the "selecter". ''' assert len(hidden_dims) >= 1, "at least one hidden dim" with ParameterSharing({'origin': 'adjoint'}): z = model.input_feature_schema.origin_input z_lst = [] idx = 0 with scope.NameScope('origin'): for hidden_dim in hidden_dims: gamma = model.FC(z, hidden_dim, weight_optim=optim, bias_optim=optim, name='fc{}'.format(idx)) z = model.Sigmoid(gamma, 'sig{}'.format(idx)) z_lst.append(z) idx += 1 # Output layer: no grad for the bias in this layer, # use FCWithoutBias origin_pred = model.FCWithoutBias(z, output_dim, weight_optim=optim, name='fc{}'.format(idx)) origin_pred = model.NanCheck(origin_pred, 'origin_pred') with scope.NameScope('adjoint'): # with Tags(Tags.EXCLUDE_FROM_PREDICTION): alpha = model.input_feature_schema.adjoint_input for hidden_dim in reversed(hidden_dims): gamma_ad = model.FCTransposeW(alpha, hidden_dim, weight_optim=optim, name='fc{}'.format(idx)) z = z_lst[idx - 1] # Note: passing gradient is helpful # z = model.StopGradient(z, z) # TODO: use add_global_constant one_vector = model.ConstantFill([z], 'ones{}'.format(idx), value=1.0, dtype=core.DataType.FLOAT) multiplier = model.Mul( [z, model.Sub([one_vector, z], 'sub{}'.format(idx))], 'multiplier{}'.format(idx), ) alpha = model.Mul([gamma_ad, multiplier], 'adjoint_layer{}'.format(idx)) idx -= 1 adjoint_pred = model.FCTransposeW(alpha, input_dim, weight_optim=optim, name='fc{}'.format(idx)) # Add loss model.trainer_extra_schema.prediction.set_value(adjoint_pred.get(), unsafe=True) loss = model.BatchDirectMSELoss(model.trainer_extra_schema) model.add_loss(loss) # Set output model.output_schema.origin_pred.set_value(origin_pred.get(), unsafe=True) model.output_schema.adjoint_pred.set_value(adjoint_pred.get(), unsafe=True) model.output_schema.loss.set_value(loss.get(), unsafe=True) return origin_pred, adjoint_pred, loss
def build_adjoint_pinn( model, sig_input_dim=1, tanh_input_dim=1, sig_net_dim=[1], tanh_net_dim=[1], weight_optim=None, bias_optim=None, adjoint_tag=Tags.EXCLUDE_FROM_PREDICTION, train_target=TrainTarget.ADJOINT, loss_function='scaled_l1', max_loss_scale=1.0, ): ''' sig_net_dim and tanh_net_dim are the lists of dimensions for each hidden layers in the sig_net and tanh_net respectively. ''' assert len(sig_net_dim) * len(tanh_net_dim) > 0, 'arch cannot be empty' assert len(sig_net_dim) == len(tanh_net_dim), 'arch mismatch' assert sig_net_dim[-1] == tanh_net_dim[-1], 'last dim mismatch' with ParameterSharing({'origin': 'adjoint'}): sig_h_lst = [] tanh_h_lst = [] block_index = 0 with scope.NameScope('origin'): sig_h = model.input_feature_schema.sig_input tanh_h = model.input_feature_schema.tanh_input for sig_n, tanh_n in zip(sig_net_dim, tanh_net_dim): sig_h, tanh_h = build_origin_block( model, sig_h, tanh_h, sig_n, tanh_n, block_index, weight_optim=weight_optim, bias_optim=bias_optim, ) sig_h_lst.append(sig_h) tanh_h_lst.append(tanh_h) block_index += 1 origin_pred = model.Mul([sig_h, tanh_h], 'origin_pred') with scope.NameScope('adjoint'): # adjoint_tag decides how we are going to use the adjoint net. with Tags(adjoint_tag): ad_input = model.input_feature_schema.adjoint_input sig_h = sig_h_lst[block_index - 1] tanh_h = tanh_h_lst[block_index - 1] # for the output, sig_h and tanh_h has the same dimention. output_ones = model.ConstantFill( [sig_h], 'output_ones_{}'.format(block_index), value=1.0, dtype=core.DataType.FLOAT) beta = model.Mul([ tanh_h, model.Mul([ sig_h, model.Sub([output_ones, sig_h], 'sig_output_sub_{}'.format(block_index)) ], 'sig_output_mul_{}'.format(block_index)) ], 'sig_output_beta_{}'.format(block_index)) alpha = model.Mul([ sig_h, model.Sub([ output_ones, model.Mul([tanh_h, tanh_h], 'tanh_output_sq_{}'.format(block_index)) ], 'tanh_output_sub_{}'.format(block_index)) ], 'tanh_output_mul_{}'.format(block_index)) inter = model.FCTransposeW( beta, tanh_net_dim[-1], weight_optim=weight_optim, name='inter_embed_layer_{}'.format(block_index - 1)) alpha = model.Add([alpha, inter], 'tanh_output_alpha_{}'.format(block_index)) for sig_n, tanh_n in zip(reversed(sig_net_dim[:-1]), reversed(tanh_net_dim[:-1])): block_index -= 1 sig_h = sig_h_lst[block_index - 1] tanh_h = tanh_h_lst[block_index - 1] beta, alpha = build_adjoint_block( model, beta, alpha, sig_h, tanh_h, sig_n, tanh_n, block_index, weight_optim=weight_optim, ) sig_adjoint_pred = model.FCTransposeW( beta, sig_input_dim, weight_optim=weight_optim, name='sig_fc_layer_{}'.format(block_index - 1)) tanh_adjoint_pred = model.FCTransposeW( alpha, tanh_input_dim, weight_optim=weight_optim, name='tanh_fc_layer_{}'.format(block_index - 1)) # Add loss if train_target == TrainTarget.ADJOINT: model.trainer_extra_schema.sig_loss_record.prediction.set_value( sig_adjoint_pred.get(), unsafe=True) model.trainer_extra_schema.tanh_loss_record.prediction.set_value( tanh_adjoint_pred.get(), unsafe=True) # CAUTIONS: BatchDirectMSELoss calls SquaredL2Distance op, which assume # the input are 1D vector sig_loss = model.BatchDirectMSELoss( model.trainer_extra_schema.sig_loss_record) tanh_loss = model.BatchDirectMSELoss( model.trainer_extra_schema.tanh_loss_record) adjoint_loss = model.Add([sig_loss, tanh_loss], 'adjoint_loss') model.add_loss(sig_loss) model.add_loss(tanh_loss) # Set output model.output_schema.sig_adjoint_pred.set_value( sig_adjoint_pred.get(), unsafe=True) model.output_schema.tanh_adjoint_pred.set_value( tanh_adjoint_pred.get(), unsafe=True) loss = adjoint_loss elif train_target == TrainTarget.ORIGIN: model.trainer_extra_schema.origin_loss_record.prediction.set_value( origin_pred.get(), unsafe=True) # Add L1 Loss assert max_loss_scale > 1, 'max loss scale must > 1' loss_and_metrics = model.BatchDirectWeightedL1Loss( model.trainer_extra_schema.origin_loss_record, max_scale=max_loss_scale, ) # Add metric model.add_metric_field('l1_metric', loss_and_metrics.l1_metric) model.add_metric_field('scaled_l1_metric', loss_and_metrics.scaled_l1_metric) if loss_function == 'scaled_l2': print('[Pi-NN Build Net]: Use scaled_l2 loss, but l1 metrics.') loss_and_metrics = model.BatchDirectWeightedL2Loss( model.trainer_extra_schema, max_scale=max_loss_scale, ) model.add_loss(loss_and_metrics.loss) loss = loss_and_metrics.loss else: raise Exception('train target: ' + train_target + ' not implemented') model.output_schema.origin_pred.set_value(origin_pred.get(), unsafe=True) model.output_schema.loss.set_value(loss.get(), unsafe=True) return origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss
def build_adjoint_pinn( model, sig_input_dim=1, tanh_input_dim=1, sig_net_dim=[1], tanh_net_dim=[1], weight_optim=None, bias_optim=None, adjoint_tag='no_tag', train_target=TrainTarget.ADJOINT, loss_function='scaled_l1', max_loss_scale=1.0, neg_grad_penalty=None, ): ''' sig_net_dim and tanh_net_dim are the lists of dimensions for each hidden layers in the sig_net and tanh_net respectively. neg_grad_penalty['input_type']: which input type (sig/tanh) neg_grad_penalty['input_idx']: which input dim to apply negative gradient penalty neg_grad_penalty['magnitude']: the magnitude of the penalty ''' assert len(sig_net_dim) * len(tanh_net_dim) > 0, 'arch cannot be empty' assert len(sig_net_dim) == len(tanh_net_dim), 'arch mismatch' assert sig_net_dim[-1] == tanh_net_dim[-1], 'last dim mismatch' with ParameterSharing({'origin': 'adjoint'}): sig_h_lst = [] tanh_h_lst = [] block_index = 0 with scope.NameScope('origin'): sig_h = model.input_feature_schema.sig_input tanh_h = model.input_feature_schema.tanh_input for sig_n, tanh_n in zip(sig_net_dim, tanh_net_dim): sig_h, tanh_h = build_origin_block( model, sig_h, tanh_h, sig_n, tanh_n, block_index, weight_optim=weight_optim, bias_optim=bias_optim, ) sig_h_lst.append(sig_h) tanh_h_lst.append(tanh_h) block_index += 1 origin_pred = model.Mul([sig_h, tanh_h], 'origin_pred') with scope.NameScope('adjoint'): # adjoint_tag decides how we are going to use the adjoint net. with Tags(adjoint_tag): ad_input = model.input_feature_schema.adjoint_input sig_h = sig_h_lst[block_index - 1] tanh_h = tanh_h_lst[block_index - 1] # for the output, sig_h and tanh_h has the same dimention. output_ones = model.ConstantFill( [sig_h], 'output_ones_{}'.format(block_index), value=1.0, dtype=core.DataType.FLOAT) beta = model.Mul([ tanh_h, model.Mul([ sig_h, model.Sub([output_ones, sig_h], 'sig_output_sub_{}'.format(block_index)) ], 'sig_output_mul_{}'.format(block_index)) ], 'sig_output_beta_{}'.format(block_index)) alpha = model.Mul([ sig_h, model.Sub([ output_ones, model.Mul([tanh_h, tanh_h], 'tanh_output_sq_{}'.format(block_index)) ], 'tanh_output_sub_{}'.format(block_index)) ], 'tanh_output_mul_{}'.format(block_index)) inter = model.FCTransposeW( beta, tanh_net_dim[-1], weight_optim=weight_optim, name='inter_embed_layer_{}'.format(block_index - 1)) alpha = model.Add([alpha, inter], 'tanh_output_alpha_{}'.format(block_index)) for sig_n, tanh_n in zip(reversed(sig_net_dim[:-1]), reversed(tanh_net_dim[:-1])): block_index -= 1 sig_h = sig_h_lst[block_index - 1] tanh_h = tanh_h_lst[block_index - 1] beta, alpha = build_adjoint_block( model, beta, alpha, sig_h, tanh_h, sig_n, tanh_n, block_index, weight_optim=weight_optim, ) sig_adjoint_pred = model.FCTransposeW( beta, sig_input_dim, weight_optim=weight_optim, name='sig_fc_layer_{}'.format(block_index - 1)) tanh_adjoint_pred = model.FCTransposeW( alpha, tanh_input_dim, weight_optim=weight_optim, name='tanh_fc_layer_{}'.format(block_index - 1)) # Add loss if train_target == TrainTarget.ADJOINT: model.trainer_extra_schema.sig_loss_record.prediction.set_value( sig_adjoint_pred.get(), unsafe=True) model.trainer_extra_schema.tanh_loss_record.prediction.set_value( tanh_adjoint_pred.get(), unsafe=True) # CAUTIONS: BatchDirectMSELoss calls SquaredL2Distance op, which assume # the input are 1D vector sig_loss = model.BatchDirectMSELoss( model.trainer_extra_schema.sig_loss_record) tanh_loss = model.BatchDirectMSELoss( model.trainer_extra_schema.tanh_loss_record) adjoint_loss = model.Add([sig_loss, tanh_loss], 'adjoint_loss') model.add_loss(sig_loss) model.add_loss(tanh_loss) # Set output model.output_schema.sig_adjoint_pred.set_value( sig_adjoint_pred.get(), unsafe=True) model.output_schema.tanh_adjoint_pred.set_value( tanh_adjoint_pred.get(), unsafe=True) loss = adjoint_loss if train_target == TrainTarget.ORIGIN: model.trainer_extra_schema.origin_loss_record.prediction.set_value( origin_pred.get(), unsafe=True) # Add L1 Loss assert max_loss_scale > 1, 'max loss scale must > 1' loss_and_metrics = model.BatchDirectWeightedL1Loss( model.trainer_extra_schema.origin_loss_record, max_scale=max_loss_scale, ) # Add metric model.add_metric_field('l1_metric', loss_and_metrics.l1_metric) model.add_metric_field('scaled_l1_metric', loss_and_metrics.scaled_l1_metric) # Add negative gradient penalty ## TODO: Put them in a layer if neg_grad_penalty: # TODO: make neg_grad_penalty to a object with Tags(Tags.EXCLUDE_FROM_PREDICTION): assert isinstance(neg_grad_penalty['input_idx'], list) assert isinstance(neg_grad_penalty['magnitude'], float) gather_indices = model.add_global_constant( 'neg_grad_penalty_input_idx', neg_grad_penalty['input_idx'], dtype=np.int32) penalty_scaler = model.add_global_constant( 'penalty_scaler', neg_grad_penalty['magnitude'], dtype=np.float32) if neg_grad_penalty['input_type'] == 'tanh': gathered_adjoint_pred = model.BatchGather( [tanh_adjoint_pred, gather_indices], 'gathered_adjoint_pred', output_dtypes=(np.float32, (len( neg_grad_penalty['input_idx']), ))) origin_input_gate = model.BatchGather( [ model.input_feature_schema.tanh_input, gather_indices ], 'origin_input_gate', output_dtypes=(np.float32, (len( neg_grad_penalty['input_idx']), ))) elif neg_grad_penalty['input_type'] == 'sig': gathered_adjoint_pred = model.BatchGather( [sig_adjoint_pred, gather_indices], 'gathered_adjoint_pred') origin_input_gate = model.BatchGather([ model.input_feature_schema.sig_input, gather_indices ], 'origin_input_gate') else: raise Exception( 'Wrong neg_grad_penalty[\'input_type\']') ## TODO: Put them in a operator neg_gradients = model.Relu([ model.Negative([ model.FlattenToVec([gathered_adjoint_pred], 'flat_gathered_adjoint_pred') ], 'neg_gathered_adjoint_pred') ], 'neg_gradients') input_gate = model.Relu([ model.Sign([ model.FlattenToVec([origin_input_gate], 'flat_origin_input_gate') ], 'sign_origin_input_gate') ], 'input_gate') input_gate_stopgrad = model.StopGradient( [input_gate], 'input_gate_stopgrad') scaled_neg_gradient_loss = model.Mul( [ model.AveragedLoss([ model.Mul([neg_gradients, input_gate_stopgrad], 'gated_neg_gradients') ], 'avg_gated_neg_graident_loss'), penalty_scaler ], 'scaled_neg_gradient_loss', name='PenaltyScaler') model.add_metric_field('neg_gradient_loss', scaled_neg_gradient_loss) model.add_loss(scaled_neg_gradient_loss) if loss_function == 'scaled_l2': print('[Pi-NN Build Net]: Use scaled_l2 loss, but l1 metrics.') loss_and_metrics = model.BatchDirectWeightedL2Loss( model.trainer_extra_schema.origin_loss_record, max_scale=max_loss_scale, ) model.add_loss(loss_and_metrics.loss) loss = loss_and_metrics.loss else: raise Exception('train target: ' + train_target + ' not implemented') model.output_schema.origin_pred.set_value(origin_pred.get(), unsafe=True) model.output_schema.loss.set_value(loss.get(), unsafe=True) return origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss