def test_parameter_sharing_nested_scopes(self):
     # Test parameter sharing
     with scope.NameScope('global_scope'):
         with ParameterSharing({'model_b': 'model_a'}):
             param_global = parameter_sharing_context.get_parameter_name(
                 'w')
             self.assertEquals(param_global, 'global_scope/w')
             # This scope is overridden to match 'model_a'
             with scope.NameScope('model_b'):
                 with ParameterSharing({'shared_scope': ''}):
                     param_4 = parameter_sharing_context.get_parameter_name(
                         'w')
                     self.assertEquals(param_4, 'global_scope/model_a/w')
                     with scope.NameScope('shared_scope'):
                         param_5 = parameter_sharing_context.\
                             get_parameter_name('w')
                         self.assertEquals(param_5,
                                           'global_scope/model_a/w')
             # This scope is supposed to have not sharing
             with scope.NameScope('model_c'):
                 with ParameterSharing({'shared_scope': ''}):
                     param_4 = parameter_sharing_context.get_parameter_name(
                         'w')
                     self.assertEquals(param_4, 'global_scope/model_c/w')
                     with scope.NameScope('shared_scope'):
                         param_5 = parameter_sharing_context.\
                             get_parameter_name('w')
                         self.assertEquals(param_5,
                                           'global_scope/model_c/w')
Exemple #2
0
 def test_deep_hierarchy(self):
     model = model_helper.ModelHelper(name="test")
     with ParameterSharing({'a': 'b'}):
         with scope.NameScope('a'):
             with ParameterSharing({'c': 'd'}):
                 with scope.NameScope('c'):
                     with ParameterSharing({'e': 'f'}):
                         with scope.NameScope('e'):
                             p = model.create_param(
                                 'w',
                                 shape=[2],
                                 initializer=Initializer("ConstantFill"))
     self.assertNotEqual(model.get_param_info(p), None)
    def test_parameter_sharing_brew(self):
        # Test no sharing default scopes
        model = model_helper.ModelHelper(name="test")
        data = model.net.AddExternalInput("data")
        fc1 = brew.fc(model, data, "fc1", dim_in=16, dim_out=16)
        # Shared params are expected to share the same shape and fail if it's
        # not true
        with self.assertRaises(AssertionError):
            _ = brew.fc(model, data, "fc1", dim_in=2, dim_out=2)  # noqa

        output_blobs = set()
        with scope.NameScope('some_global_scope'):
            with scope.NameScope('model_a'):
                output_blobs.add(str(brew.fc(model, fc1, 'output', 16, 16)))
            with ParameterSharing({'model_b': 'model_a'}),\
                    scope.NameScope('model_b'):
                with ParameterSharing({'shared_1': '', 'shared_2': ''}):
                    # All params in DenseLayers from shared_1, shared_2 and
                    # model_a are shared and will be pointing to:
                    # [some_global_scope/model_a/output_W,
                    #  some_global_scope/model_a/output_b]
                    with scope.NameScope('shared_1'):
                        output_blobs.add(
                            str(brew.fc(model, fc1, 'output', 16, 16)))
                    with scope.NameScope('shared_2'):
                        output_blobs.add(
                            str(brew.fc(model, fc1, 'output', 16, 16)))
                    # Params of this layer are not shared with anyone unless
                    # there is some explicit sharing with model_a/unshared (not
                    # in this example).
                    # Names of the blobs are
                    # [some_global_scope/model_a/unshared/output_W,
                    #  some_global_scope/model_a/unshared/output_b]
                    with scope.NameScope('unshared'):
                        output_blobs.add(
                            str(brew.fc(model, fc1, 'output', 16, 16)))

        self.assertEqual(len(model._parameters_info), 6)
        self.assertEqual(len(output_blobs), 4)
        self.assertEqual(sorted(model._parameters_info.keys()), [
            'fc1_b',
            'fc1_w',
            'some_global_scope/model_a/output_b',
            'some_global_scope/model_a/output_w',
            'some_global_scope/model_a/unshared/output_b',
            'some_global_scope/model_a/unshared/output_w',
        ])
        model.Validate()
    def test_layer_duplicated_parameter_init(self):
        output_dims = 2
        with scope.NameScope('global_scope'):
            with ParameterSharing({'new_fc': 'shared_fc'}):
                self.model.FC(
                    self.model.input_feature_schema.float_features,
                    output_dims,
                    name='shared_fc'
                )
                self.model.FC(
                    self.model.input_feature_schema.float_features,
                    output_dims,
                    name='new_fc'
                )

        train_init_net = core.Net('train_init_net')
        train_net = core.Net('train_net')
        for layer in self.model.layers:
            layer.add_operators(train_net, train_init_net)
        op_outputs = []
        for op in train_init_net._net.op:
            op_outputs.extend(op.output)

        # only fill these parameter blobs once
        self.assertEquals(
            sorted(op_outputs),
            ['global_scope/shared_fc/b', 'global_scope/shared_fc/w']
        )
Exemple #5
0
    def apply_over_sequence(
        self,
        model,
        inputs,
        seq_lengths,
        initial_states,
        outputs_with_grads=None,
    ):
        inputs = self.cell.prepare_input(model, inputs)

        # Now they are blob references - outputs of splitting the input sequence
        split_inputs = model.net.Split(
            inputs,
            [str(inputs) + "_timestep_{}".format(i) for i in range(self.T)],
            axis=0)
        if self.T == 1:
            split_inputs = [split_inputs]

        states = initial_states
        all_states = []
        for t in range(0, self.T):
            scope_name = "timestep_{}".format(t)
            # Parameters of all timesteps are shared
            with ParameterSharing({scope_name: ''}),\
                 scope.NameScope(scope_name):
                timestep = model.param_init_net.ConstantFill(
                    [],
                    "timestep",
                    value=t,
                    shape=[1],
                    dtype=core.DataType.INT32,
                    device_option=core.DeviceOption(caffe2_pb2.CPU))
                states = self.cell._apply(
                    model=model,
                    input_t=split_inputs[t],
                    seq_lengths=seq_lengths,
                    states=states,
                    timestep=timestep,
                )
            all_states.append(states)

        all_states = zip(*all_states)
        all_states = [
            model.net.Concat(list(full_output), [
                str(full_output[0])[len("timestep_0/"):] + "_concat",
                str(full_output[0])[len("timestep_0/"):] + "_concat_info"
            ],
                             axis=0)[0] for full_output in all_states
        ]
        outputs = tuple(
            six.next(it)
            for it in itertools.cycle([iter(all_states),
                                       iter(states)]))
        outputs_without_grad = set(range(
            len(outputs))) - set(outputs_with_grads)
        for i in outputs_without_grad:
            model.net.ZeroGradient(outputs[i], [])
        logging.debug("Added 0 gradients for blobs:",
                      [outputs[i] for i in outputs_without_grad])
        return None, outputs
    def test_layer_shared_parameter_name_within_same_namescope(self):
        output_dims = 2
        with scope.NameScope('global_scope'):
            with ParameterSharing({'fc_auto_0': 'fc'}):
                self.model.FC(self.model.input_feature_schema.float_features,
                              output_dims)
                self.assertEquals(self.model.layers[-1].w, 'global_scope/fc/w')

                self.model.FC(self.model.input_feature_schema.float_features,
                              output_dims)
                self.assertEquals(self.model.layers[-1].w, 'global_scope/fc/w')
    def test_layer_shared_parameter_name_different_shapes(self):
        output_dims = 2
        with scope.NameScope('global_scope'):
            with ParameterSharing({'fc_auto_0': 'fc'}):
                self.model.FC(self.model.input_feature_schema.float_features,
                              output_dims)
                self.assertEquals(self.model.layers[-1].w, 'global_scope/fc/w')

                with six.assertRaisesRegex(self, ValueError,
                                           'Got inconsistent shapes .*'):
                    self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims + 1)
 def test_parameter_sharing_subscopes(self):
     # Sharing only one of the subscopes
     with ParameterSharing({'global_scope/b': 'global_scope/a'}):
         with scope.NameScope('global_scope'):
             param_6 = parameter_sharing_context.get_parameter_name('w')
             self.assertEquals(param_6, 'global_scope/w')
             with scope.NameScope('a'):
                 param_7 = parameter_sharing_context.get_parameter_name('w')
                 self.assertEquals(param_7, 'global_scope/a/w')
             with scope.NameScope('b'):
                 param_8 = parameter_sharing_context.get_parameter_name('w')
                 self.assertEquals(param_8, 'global_scope/a/w')
             with scope.NameScope('c'):
                 param_9 = parameter_sharing_context.get_parameter_name('w')
                 self.assertEquals(param_9, 'global_scope/c/w')
    def test_layer_shared_parameter_name_within_same_namescope_customized_name(
            self):
        output_dims = 2
        with scope.NameScope('global_scope'):
            with ParameterSharing({'new_fc': 'shared_fc'}):
                self.model.FC(self.model.input_feature_schema.float_features,
                              output_dims,
                              name='shared_fc')
                self.assertEquals(self.model.layers[-1].w,
                                  'global_scope/shared_fc/w')

                self.model.FC(self.model.input_feature_schema.float_features,
                              output_dims,
                              name='new_fc')
                self.assertEquals(self.model.layers[-1].w,
                                  'global_scope/shared_fc/w')
    def test_layer_shared_parameter_name_different_namescopes(self):
        output_dims = 2
        with scope.NameScope('global_scope'):
            with ParameterSharing({'scope_1': 'scope_0'}):
                with scope.NameScope('scope_0'):
                    fc1_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims)
                    self.assertEquals(self.model.layers[-1].w,
                                      'global_scope/scope_0/fc/w')
                    self.assertEquals(fc1_output(),
                                      'global_scope/scope_0/fc/output')

                with scope.NameScope('scope_1'):
                    fc2_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims)
                    self.assertEquals(self.model.layers[-1].w,
                                      'global_scope/scope_0/fc/w')
                    self.assertEquals(fc2_output(),
                                      'global_scope/scope_1/fc/output')
    def test_layer_shared_parameter_optim_validator(self):
        """
        This test is to cover the _validate_param_optim function in
        layer_model_helper class.
        """

        output_dims = 2

        adagrad_optim = AdagradOptimizer(
            alpha=0.004,
            epsilon=0.02,
        )

        self.model.default_optimizer = adagrad_optim

        # the following covers the branch -- optim is None
        with scope.NameScope('global_scope_0'):
            with ParameterSharing({'scope_1': 'scope_0'}):
                with scope.NameScope('scope_0'):
                    fc1_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims,
                        weight_optim=self.model.NoOptim,
                    )

                with scope.NameScope('scope_1'), self.assertRaises(Exception):
                    fc2_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims)

        # the following covers the branch -- optim is NoOptim
        with scope.NameScope('global_scope_1'):
            with ParameterSharing({'scope_1': 'scope_0'}):
                with scope.NameScope('scope_0'):
                    fc1_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims,
                        weight_optim=None,
                    )

                with scope.NameScope('scope_1'), self.assertRaises(Exception):
                    fc2_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims,
                        weight_optim=self.model.NoOptim,
                    )

        # the following covers the branch -- optim is an instance of Optimizer
        adagrad_optim_2 = AdagradOptimizer(
            alpha=0.005,
            epsilon=0.02,
        )

        adam_optim = AdamOptimizer()

        self.model.default_optimizer = adagrad_optim_2

        with scope.NameScope('global_scope_2'):
            with ParameterSharing({
                    'scope_1': 'scope_0',
                    'scope_2': 'scope_0'
            }):
                with scope.NameScope('scope_0'):
                    fc1_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims,
                        weight_optim=None,  # it will use adagrad_optim_2
                    )

                with scope.NameScope('scope_1'), self.assertRaises(Exception):
                    fc2_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims,
                        weight_optim=adagrad_optim,
                    )

                with scope.NameScope('scope_2'), self.assertRaises(Exception):
                    fc2_output = self.model.FC(
                        self.model.input_feature_schema.float_features,
                        output_dims,
                        weight_optim=adam_optim,
                    )
Exemple #12
0
def build_adjoint_mlp(
    model,
    input_dim=1,
    hidden_dims=[5, 5],
    output_dim=1,
    optim=None,
):
    ''' Precondition:
			model.input_feature_schema.origin_input has shape of (input_dim, )
			model.input_feature_schema.adjoint_input has shape of (output_dim, )
		Note:
			adjoint_input is binary array, e.g. [1, 0], which is used as the 
			"selecter".
	'''
    assert len(hidden_dims) >= 1, "at least one hidden dim"
    with ParameterSharing({'origin': 'adjoint'}):
        z = model.input_feature_schema.origin_input
        z_lst = []
        idx = 0
        with scope.NameScope('origin'):
            for hidden_dim in hidden_dims:
                gamma = model.FC(z,
                                 hidden_dim,
                                 weight_optim=optim,
                                 bias_optim=optim,
                                 name='fc{}'.format(idx))
                z = model.Sigmoid(gamma, 'sig{}'.format(idx))
                z_lst.append(z)
                idx += 1
            # Output layer: no grad for the bias in this layer,
            # use FCWithoutBias
            origin_pred = model.FCWithoutBias(z,
                                              output_dim,
                                              weight_optim=optim,
                                              name='fc{}'.format(idx))
            origin_pred = model.NanCheck(origin_pred, 'origin_pred')

        with scope.NameScope('adjoint'):
            # with Tags(Tags.EXCLUDE_FROM_PREDICTION):
            alpha = model.input_feature_schema.adjoint_input
            for hidden_dim in reversed(hidden_dims):
                gamma_ad = model.FCTransposeW(alpha,
                                              hidden_dim,
                                              weight_optim=optim,
                                              name='fc{}'.format(idx))
                z = z_lst[idx - 1]
                # Note: passing gradient is helpful
                # z = model.StopGradient(z, z)
                # TODO: use add_global_constant
                one_vector = model.ConstantFill([z],
                                                'ones{}'.format(idx),
                                                value=1.0,
                                                dtype=core.DataType.FLOAT)
                multiplier = model.Mul(
                    [z, model.Sub([one_vector, z], 'sub{}'.format(idx))],
                    'multiplier{}'.format(idx),
                )
                alpha = model.Mul([gamma_ad, multiplier],
                                  'adjoint_layer{}'.format(idx))
                idx -= 1
            adjoint_pred = model.FCTransposeW(alpha,
                                              input_dim,
                                              weight_optim=optim,
                                              name='fc{}'.format(idx))
    # Add loss
    model.trainer_extra_schema.prediction.set_value(adjoint_pred.get(),
                                                    unsafe=True)
    loss = model.BatchDirectMSELoss(model.trainer_extra_schema)
    model.add_loss(loss)
    # Set output
    model.output_schema.origin_pred.set_value(origin_pred.get(), unsafe=True)
    model.output_schema.adjoint_pred.set_value(adjoint_pred.get(), unsafe=True)
    model.output_schema.loss.set_value(loss.get(), unsafe=True)

    return origin_pred, adjoint_pred, loss
Exemple #13
0
def build_adjoint_pinn(
    model,
    sig_input_dim=1,
    tanh_input_dim=1,
    sig_net_dim=[1],
    tanh_net_dim=[1],
    weight_optim=None,
    bias_optim=None,
    adjoint_tag=Tags.EXCLUDE_FROM_PREDICTION,
    train_target=TrainTarget.ADJOINT,
    loss_function='scaled_l1',
    max_loss_scale=1.0,
):
    '''
		sig_net_dim and tanh_net_dim are the lists of dimensions for each hidden
		layers in the sig_net and tanh_net respectively.
	'''
    assert len(sig_net_dim) * len(tanh_net_dim) > 0, 'arch cannot be empty'
    assert len(sig_net_dim) == len(tanh_net_dim), 'arch mismatch'
    assert sig_net_dim[-1] == tanh_net_dim[-1], 'last dim mismatch'

    with ParameterSharing({'origin': 'adjoint'}):
        sig_h_lst = []
        tanh_h_lst = []
        block_index = 0
        with scope.NameScope('origin'):
            sig_h = model.input_feature_schema.sig_input
            tanh_h = model.input_feature_schema.tanh_input
            for sig_n, tanh_n in zip(sig_net_dim, tanh_net_dim):
                sig_h, tanh_h = build_origin_block(
                    model,
                    sig_h,
                    tanh_h,
                    sig_n,
                    tanh_n,
                    block_index,
                    weight_optim=weight_optim,
                    bias_optim=bias_optim,
                )
                sig_h_lst.append(sig_h)
                tanh_h_lst.append(tanh_h)
                block_index += 1
            origin_pred = model.Mul([sig_h, tanh_h], 'origin_pred')
        with scope.NameScope('adjoint'):
            # adjoint_tag decides how we are going to use the adjoint net.
            with Tags(adjoint_tag):
                ad_input = model.input_feature_schema.adjoint_input
                sig_h = sig_h_lst[block_index - 1]
                tanh_h = tanh_h_lst[block_index - 1]
                # for the output, sig_h and tanh_h has the same dimention.
                output_ones = model.ConstantFill(
                    [sig_h],
                    'output_ones_{}'.format(block_index),
                    value=1.0,
                    dtype=core.DataType.FLOAT)
                beta = model.Mul([
                    tanh_h,
                    model.Mul([
                        sig_h,
                        model.Sub([output_ones, sig_h],
                                  'sig_output_sub_{}'.format(block_index))
                    ], 'sig_output_mul_{}'.format(block_index))
                ], 'sig_output_beta_{}'.format(block_index))
                alpha = model.Mul([
                    sig_h,
                    model.Sub([
                        output_ones,
                        model.Mul([tanh_h, tanh_h],
                                  'tanh_output_sq_{}'.format(block_index))
                    ], 'tanh_output_sub_{}'.format(block_index))
                ], 'tanh_output_mul_{}'.format(block_index))
                inter = model.FCTransposeW(
                    beta,
                    tanh_net_dim[-1],
                    weight_optim=weight_optim,
                    name='inter_embed_layer_{}'.format(block_index - 1))
                alpha = model.Add([alpha, inter],
                                  'tanh_output_alpha_{}'.format(block_index))
                for sig_n, tanh_n in zip(reversed(sig_net_dim[:-1]),
                                         reversed(tanh_net_dim[:-1])):
                    block_index -= 1
                    sig_h = sig_h_lst[block_index - 1]
                    tanh_h = tanh_h_lst[block_index - 1]
                    beta, alpha = build_adjoint_block(
                        model,
                        beta,
                        alpha,
                        sig_h,
                        tanh_h,
                        sig_n,
                        tanh_n,
                        block_index,
                        weight_optim=weight_optim,
                    )
                sig_adjoint_pred = model.FCTransposeW(
                    beta,
                    sig_input_dim,
                    weight_optim=weight_optim,
                    name='sig_fc_layer_{}'.format(block_index - 1))
                tanh_adjoint_pred = model.FCTransposeW(
                    alpha,
                    tanh_input_dim,
                    weight_optim=weight_optim,
                    name='tanh_fc_layer_{}'.format(block_index - 1))

        # Add loss
        if train_target == TrainTarget.ADJOINT:
            model.trainer_extra_schema.sig_loss_record.prediction.set_value(
                sig_adjoint_pred.get(), unsafe=True)
            model.trainer_extra_schema.tanh_loss_record.prediction.set_value(
                tanh_adjoint_pred.get(), unsafe=True)
            # CAUTIONS: BatchDirectMSELoss calls SquaredL2Distance op, which assume
            # the input are 1D vector
            sig_loss = model.BatchDirectMSELoss(
                model.trainer_extra_schema.sig_loss_record)
            tanh_loss = model.BatchDirectMSELoss(
                model.trainer_extra_schema.tanh_loss_record)
            adjoint_loss = model.Add([sig_loss, tanh_loss], 'adjoint_loss')
            model.add_loss(sig_loss)
            model.add_loss(tanh_loss)
            # Set output
            model.output_schema.sig_adjoint_pred.set_value(
                sig_adjoint_pred.get(), unsafe=True)
            model.output_schema.tanh_adjoint_pred.set_value(
                tanh_adjoint_pred.get(), unsafe=True)
            loss = adjoint_loss
        elif train_target == TrainTarget.ORIGIN:
            model.trainer_extra_schema.origin_loss_record.prediction.set_value(
                origin_pred.get(), unsafe=True)
            # Add L1 Loss
            assert max_loss_scale > 1, 'max loss scale must > 1'
            loss_and_metrics = model.BatchDirectWeightedL1Loss(
                model.trainer_extra_schema.origin_loss_record,
                max_scale=max_loss_scale,
            )
            # Add metric
            model.add_metric_field('l1_metric', loss_and_metrics.l1_metric)
            model.add_metric_field('scaled_l1_metric',
                                   loss_and_metrics.scaled_l1_metric)
            if loss_function == 'scaled_l2':
                print('[Pi-NN Build Net]: Use scaled_l2 loss, but l1 metrics.')
                loss_and_metrics = model.BatchDirectWeightedL2Loss(
                    model.trainer_extra_schema,
                    max_scale=max_loss_scale,
                )
            model.add_loss(loss_and_metrics.loss)
            loss = loss_and_metrics.loss
        else:
            raise Exception('train target: ' + train_target +
                            ' not implemented')

        model.output_schema.origin_pred.set_value(origin_pred.get(),
                                                  unsafe=True)
        model.output_schema.loss.set_value(loss.get(), unsafe=True)

        return origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss
Exemple #14
0
def build_adjoint_pinn(
    model,
    sig_input_dim=1,
    tanh_input_dim=1,
    sig_net_dim=[1],
    tanh_net_dim=[1],
    weight_optim=None,
    bias_optim=None,
    adjoint_tag='no_tag',
    train_target=TrainTarget.ADJOINT,
    loss_function='scaled_l1',
    max_loss_scale=1.0,
    neg_grad_penalty=None,
):
    '''
		sig_net_dim and tanh_net_dim are the lists of dimensions for each hidden
		layers in the sig_net and tanh_net respectively.

		neg_grad_penalty['input_type']: which input type (sig/tanh)
		neg_grad_penalty['input_idx']: which input dim to apply negative gradient penalty
		neg_grad_penalty['magnitude']: the magnitude of the penalty
	'''
    assert len(sig_net_dim) * len(tanh_net_dim) > 0, 'arch cannot be empty'
    assert len(sig_net_dim) == len(tanh_net_dim), 'arch mismatch'
    assert sig_net_dim[-1] == tanh_net_dim[-1], 'last dim mismatch'

    with ParameterSharing({'origin': 'adjoint'}):
        sig_h_lst = []
        tanh_h_lst = []
        block_index = 0
        with scope.NameScope('origin'):
            sig_h = model.input_feature_schema.sig_input
            tanh_h = model.input_feature_schema.tanh_input
            for sig_n, tanh_n in zip(sig_net_dim, tanh_net_dim):
                sig_h, tanh_h = build_origin_block(
                    model,
                    sig_h,
                    tanh_h,
                    sig_n,
                    tanh_n,
                    block_index,
                    weight_optim=weight_optim,
                    bias_optim=bias_optim,
                )
                sig_h_lst.append(sig_h)
                tanh_h_lst.append(tanh_h)
                block_index += 1
            origin_pred = model.Mul([sig_h, tanh_h], 'origin_pred')
        with scope.NameScope('adjoint'):
            # adjoint_tag decides how we are going to use the adjoint net.
            with Tags(adjoint_tag):
                ad_input = model.input_feature_schema.adjoint_input
                sig_h = sig_h_lst[block_index - 1]
                tanh_h = tanh_h_lst[block_index - 1]
                # for the output, sig_h and tanh_h has the same dimention.
                output_ones = model.ConstantFill(
                    [sig_h],
                    'output_ones_{}'.format(block_index),
                    value=1.0,
                    dtype=core.DataType.FLOAT)
                beta = model.Mul([
                    tanh_h,
                    model.Mul([
                        sig_h,
                        model.Sub([output_ones, sig_h],
                                  'sig_output_sub_{}'.format(block_index))
                    ], 'sig_output_mul_{}'.format(block_index))
                ], 'sig_output_beta_{}'.format(block_index))
                alpha = model.Mul([
                    sig_h,
                    model.Sub([
                        output_ones,
                        model.Mul([tanh_h, tanh_h],
                                  'tanh_output_sq_{}'.format(block_index))
                    ], 'tanh_output_sub_{}'.format(block_index))
                ], 'tanh_output_mul_{}'.format(block_index))
                inter = model.FCTransposeW(
                    beta,
                    tanh_net_dim[-1],
                    weight_optim=weight_optim,
                    name='inter_embed_layer_{}'.format(block_index - 1))
                alpha = model.Add([alpha, inter],
                                  'tanh_output_alpha_{}'.format(block_index))
                for sig_n, tanh_n in zip(reversed(sig_net_dim[:-1]),
                                         reversed(tanh_net_dim[:-1])):
                    block_index -= 1
                    sig_h = sig_h_lst[block_index - 1]
                    tanh_h = tanh_h_lst[block_index - 1]
                    beta, alpha = build_adjoint_block(
                        model,
                        beta,
                        alpha,
                        sig_h,
                        tanh_h,
                        sig_n,
                        tanh_n,
                        block_index,
                        weight_optim=weight_optim,
                    )
                sig_adjoint_pred = model.FCTransposeW(
                    beta,
                    sig_input_dim,
                    weight_optim=weight_optim,
                    name='sig_fc_layer_{}'.format(block_index - 1))
                tanh_adjoint_pred = model.FCTransposeW(
                    alpha,
                    tanh_input_dim,
                    weight_optim=weight_optim,
                    name='tanh_fc_layer_{}'.format(block_index - 1))

        # Add loss
        if train_target == TrainTarget.ADJOINT:
            model.trainer_extra_schema.sig_loss_record.prediction.set_value(
                sig_adjoint_pred.get(), unsafe=True)
            model.trainer_extra_schema.tanh_loss_record.prediction.set_value(
                tanh_adjoint_pred.get(), unsafe=True)
            # CAUTIONS: BatchDirectMSELoss calls SquaredL2Distance op, which assume
            # the input are 1D vector
            sig_loss = model.BatchDirectMSELoss(
                model.trainer_extra_schema.sig_loss_record)
            tanh_loss = model.BatchDirectMSELoss(
                model.trainer_extra_schema.tanh_loss_record)
            adjoint_loss = model.Add([sig_loss, tanh_loss], 'adjoint_loss')
            model.add_loss(sig_loss)
            model.add_loss(tanh_loss)
            # Set output
            model.output_schema.sig_adjoint_pred.set_value(
                sig_adjoint_pred.get(), unsafe=True)
            model.output_schema.tanh_adjoint_pred.set_value(
                tanh_adjoint_pred.get(), unsafe=True)
            loss = adjoint_loss
        if train_target == TrainTarget.ORIGIN:
            model.trainer_extra_schema.origin_loss_record.prediction.set_value(
                origin_pred.get(), unsafe=True)
            # Add L1 Loss
            assert max_loss_scale > 1, 'max loss scale must > 1'
            loss_and_metrics = model.BatchDirectWeightedL1Loss(
                model.trainer_extra_schema.origin_loss_record,
                max_scale=max_loss_scale,
            )
            # Add metric
            model.add_metric_field('l1_metric', loss_and_metrics.l1_metric)
            model.add_metric_field('scaled_l1_metric',
                                   loss_and_metrics.scaled_l1_metric)

            # Add negative gradient penalty
            ## TODO: Put them in a layer
            if neg_grad_penalty:
                # TODO: make neg_grad_penalty to a object
                with Tags(Tags.EXCLUDE_FROM_PREDICTION):
                    assert isinstance(neg_grad_penalty['input_idx'], list)
                    assert isinstance(neg_grad_penalty['magnitude'], float)
                    gather_indices = model.add_global_constant(
                        'neg_grad_penalty_input_idx',
                        neg_grad_penalty['input_idx'],
                        dtype=np.int32)
                    penalty_scaler = model.add_global_constant(
                        'penalty_scaler',
                        neg_grad_penalty['magnitude'],
                        dtype=np.float32)
                    if neg_grad_penalty['input_type'] == 'tanh':
                        gathered_adjoint_pred = model.BatchGather(
                            [tanh_adjoint_pred, gather_indices],
                            'gathered_adjoint_pred',
                            output_dtypes=(np.float32, (len(
                                neg_grad_penalty['input_idx']), )))
                        origin_input_gate = model.BatchGather(
                            [
                                model.input_feature_schema.tanh_input,
                                gather_indices
                            ],
                            'origin_input_gate',
                            output_dtypes=(np.float32, (len(
                                neg_grad_penalty['input_idx']), )))

                    elif neg_grad_penalty['input_type'] == 'sig':
                        gathered_adjoint_pred = model.BatchGather(
                            [sig_adjoint_pred, gather_indices],
                            'gathered_adjoint_pred')
                        origin_input_gate = model.BatchGather([
                            model.input_feature_schema.sig_input,
                            gather_indices
                        ], 'origin_input_gate')
                    else:
                        raise Exception(
                            'Wrong neg_grad_penalty[\'input_type\']')

                    ## TODO: Put them in a operator
                    neg_gradients = model.Relu([
                        model.Negative([
                            model.FlattenToVec([gathered_adjoint_pred],
                                               'flat_gathered_adjoint_pred')
                        ], 'neg_gathered_adjoint_pred')
                    ], 'neg_gradients')
                    input_gate = model.Relu([
                        model.Sign([
                            model.FlattenToVec([origin_input_gate],
                                               'flat_origin_input_gate')
                        ], 'sign_origin_input_gate')
                    ], 'input_gate')
                    input_gate_stopgrad = model.StopGradient(
                        [input_gate], 'input_gate_stopgrad')
                    scaled_neg_gradient_loss = model.Mul(
                        [
                            model.AveragedLoss([
                                model.Mul([neg_gradients, input_gate_stopgrad],
                                          'gated_neg_gradients')
                            ], 'avg_gated_neg_graident_loss'), penalty_scaler
                        ],
                        'scaled_neg_gradient_loss',
                        name='PenaltyScaler')
                    model.add_metric_field('neg_gradient_loss',
                                           scaled_neg_gradient_loss)
                    model.add_loss(scaled_neg_gradient_loss)

            if loss_function == 'scaled_l2':
                print('[Pi-NN Build Net]: Use scaled_l2 loss, but l1 metrics.')
                loss_and_metrics = model.BatchDirectWeightedL2Loss(
                    model.trainer_extra_schema.origin_loss_record,
                    max_scale=max_loss_scale,
                )
            model.add_loss(loss_and_metrics.loss)
            loss = loss_and_metrics.loss
        else:
            raise Exception('train target: ' + train_target +
                            ' not implemented')

        model.output_schema.origin_pred.set_value(origin_pred.get(),
                                                  unsafe=True)
        model.output_schema.loss.set_value(loss.get(), unsafe=True)

        return origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss