Beispiel #1
0
    def testSelectRecordByContext(self):
        float_features = self.model.input_feature_schema.float_features

        float_array = np.array([1.0, 2.0], dtype=np.float32)

        schema.FeedRecord(float_features, [float_array])

        with Tags(Tags.EXCLUDE_FROM_PREDICTION):
            log_float_features = self.model.Log(float_features, 1)
        joined = self.model.SelectRecordByContext(
            schema.Struct(
                (InstantiationContext.PREDICTION, float_features),
                (InstantiationContext.TRAINING, log_float_features),
                # TODO: TRAIN_ONLY layers are also generated in eval
                (InstantiationContext.EVAL, log_float_features),
            ))

        # model.output_schema has to a struct
        self.model.output_schema = schema.Struct(('joined', joined))
        predict_net = layer_model_instantiator.generate_predict_net(self.model)
        workspace.RunNetOnce(predict_net)
        predict_output = schema.FetchRecord(predict_net.output_record())
        npt.assert_array_equal(float_array, predict_output['joined']())
        eval_net = layer_model_instantiator.generate_eval_net(self.model)
        workspace.RunNetOnce(eval_net)
        eval_output = schema.FetchRecord(eval_net.output_record())
        npt.assert_array_equal(np.log(float_array), eval_output['joined']())
        _, train_net = (
            layer_model_instantiator.generate_training_nets_forward_only(
                self.model))
        workspace.RunNetOnce(train_net)
        train_output = schema.FetchRecord(train_net.output_record())
        npt.assert_array_equal(np.log(float_array), train_output['joined']())
Beispiel #2
0
def build_input_reader(
    model,
    db_name,
    db_type,
    input_names_lst,
    batch_size=1,
    data_type='train',
):
    '''
	Init the dbreader and build the network for reading the data,
	however, the newwork is not connected to the computation network yet.
	Therefore we can switch between different data sources.
	'''
    assert batch_size != 0, 'batch_size cannot be zero'
    reader_init_net = core.Net('reader_init_net_' + data_type)
    dbreader = reader_init_net.CreateDB([],
                                        'dbreader_' + data_type,
                                        db=db_name,
                                        db_type=db_type)
    # need to initialze dbreader ONLY ONCE
    workspace.RunNetOnce(reader_init_net)
    if data_type == 'train':
        TAG = Tags.TRAIN_ONLY
    elif data_type == 'eval':
        TAG = Tags.EVAL_ONLY
    else:
        raise Exception('data type: {} not valid.'.format(data_type))
    with Tags(TAG):
        # the last one is the label
        input_data_struct = model.TensorProtosDBInput([dbreader],
                                                      input_names_lst,
                                                      name='DBInput_' +
                                                      data_type,
                                                      batch_size=batch_size)
        input_data_lst = [input_data for input_data in input_data_struct]
        for i in range(len(input_data_lst) - 1):
            input_data_lst[i] = model.StopGradient(input_data_lst[i],
                                                   input_data_lst[i])
    return input_data_lst
Beispiel #3
0
def build_adjoint_pinn(
    model,
    sig_input_dim=1,
    tanh_input_dim=1,
    sig_net_dim=[1],
    tanh_net_dim=[1],
    weight_optim=None,
    bias_optim=None,
    adjoint_tag='no_tag',
    train_target=TrainTarget.ADJOINT,
    loss_function='scaled_l1',
    max_loss_scale=1.0,
    neg_grad_penalty=None,
):
    '''
		sig_net_dim and tanh_net_dim are the lists of dimensions for each hidden
		layers in the sig_net and tanh_net respectively.

		neg_grad_penalty['input_type']: which input type (sig/tanh)
		neg_grad_penalty['input_idx']: which input dim to apply negative gradient penalty
		neg_grad_penalty['magnitude']: the magnitude of the penalty
	'''
    assert len(sig_net_dim) * len(tanh_net_dim) > 0, 'arch cannot be empty'
    assert len(sig_net_dim) == len(tanh_net_dim), 'arch mismatch'
    assert sig_net_dim[-1] == tanh_net_dim[-1], 'last dim mismatch'

    with ParameterSharing({'origin': 'adjoint'}):
        sig_h_lst = []
        tanh_h_lst = []
        block_index = 0
        with scope.NameScope('origin'):
            sig_h = model.input_feature_schema.sig_input
            tanh_h = model.input_feature_schema.tanh_input
            for sig_n, tanh_n in zip(sig_net_dim, tanh_net_dim):
                sig_h, tanh_h = build_origin_block(
                    model,
                    sig_h,
                    tanh_h,
                    sig_n,
                    tanh_n,
                    block_index,
                    weight_optim=weight_optim,
                    bias_optim=bias_optim,
                )
                sig_h_lst.append(sig_h)
                tanh_h_lst.append(tanh_h)
                block_index += 1
            origin_pred = model.Mul([sig_h, tanh_h], 'origin_pred')
        with scope.NameScope('adjoint'):
            # adjoint_tag decides how we are going to use the adjoint net.
            with Tags(adjoint_tag):
                ad_input = model.input_feature_schema.adjoint_input
                sig_h = sig_h_lst[block_index - 1]
                tanh_h = tanh_h_lst[block_index - 1]
                # for the output, sig_h and tanh_h has the same dimention.
                output_ones = model.ConstantFill(
                    [sig_h],
                    'output_ones_{}'.format(block_index),
                    value=1.0,
                    dtype=core.DataType.FLOAT)
                beta = model.Mul([
                    tanh_h,
                    model.Mul([
                        sig_h,
                        model.Sub([output_ones, sig_h],
                                  'sig_output_sub_{}'.format(block_index))
                    ], 'sig_output_mul_{}'.format(block_index))
                ], 'sig_output_beta_{}'.format(block_index))
                alpha = model.Mul([
                    sig_h,
                    model.Sub([
                        output_ones,
                        model.Mul([tanh_h, tanh_h],
                                  'tanh_output_sq_{}'.format(block_index))
                    ], 'tanh_output_sub_{}'.format(block_index))
                ], 'tanh_output_mul_{}'.format(block_index))
                inter = model.FCTransposeW(
                    beta,
                    tanh_net_dim[-1],
                    weight_optim=weight_optim,
                    name='inter_embed_layer_{}'.format(block_index - 1))
                alpha = model.Add([alpha, inter],
                                  'tanh_output_alpha_{}'.format(block_index))
                for sig_n, tanh_n in zip(reversed(sig_net_dim[:-1]),
                                         reversed(tanh_net_dim[:-1])):
                    block_index -= 1
                    sig_h = sig_h_lst[block_index - 1]
                    tanh_h = tanh_h_lst[block_index - 1]
                    beta, alpha = build_adjoint_block(
                        model,
                        beta,
                        alpha,
                        sig_h,
                        tanh_h,
                        sig_n,
                        tanh_n,
                        block_index,
                        weight_optim=weight_optim,
                    )
                sig_adjoint_pred = model.FCTransposeW(
                    beta,
                    sig_input_dim,
                    weight_optim=weight_optim,
                    name='sig_fc_layer_{}'.format(block_index - 1))
                tanh_adjoint_pred = model.FCTransposeW(
                    alpha,
                    tanh_input_dim,
                    weight_optim=weight_optim,
                    name='tanh_fc_layer_{}'.format(block_index - 1))

        # Add loss
        if train_target == TrainTarget.ADJOINT:
            model.trainer_extra_schema.sig_loss_record.prediction.set_value(
                sig_adjoint_pred.get(), unsafe=True)
            model.trainer_extra_schema.tanh_loss_record.prediction.set_value(
                tanh_adjoint_pred.get(), unsafe=True)
            # CAUTIONS: BatchDirectMSELoss calls SquaredL2Distance op, which assume
            # the input are 1D vector
            sig_loss = model.BatchDirectMSELoss(
                model.trainer_extra_schema.sig_loss_record)
            tanh_loss = model.BatchDirectMSELoss(
                model.trainer_extra_schema.tanh_loss_record)
            adjoint_loss = model.Add([sig_loss, tanh_loss], 'adjoint_loss')
            model.add_loss(sig_loss)
            model.add_loss(tanh_loss)
            # Set output
            model.output_schema.sig_adjoint_pred.set_value(
                sig_adjoint_pred.get(), unsafe=True)
            model.output_schema.tanh_adjoint_pred.set_value(
                tanh_adjoint_pred.get(), unsafe=True)
            loss = adjoint_loss
        if train_target == TrainTarget.ORIGIN:
            model.trainer_extra_schema.origin_loss_record.prediction.set_value(
                origin_pred.get(), unsafe=True)
            # Add L1 Loss
            assert max_loss_scale > 1, 'max loss scale must > 1'
            loss_and_metrics = model.BatchDirectWeightedL1Loss(
                model.trainer_extra_schema.origin_loss_record,
                max_scale=max_loss_scale,
            )
            # Add metric
            model.add_metric_field('l1_metric', loss_and_metrics.l1_metric)
            model.add_metric_field('scaled_l1_metric',
                                   loss_and_metrics.scaled_l1_metric)

            # Add negative gradient penalty
            ## TODO: Put them in a layer
            if neg_grad_penalty:
                # TODO: make neg_grad_penalty to a object
                with Tags(Tags.EXCLUDE_FROM_PREDICTION):
                    assert isinstance(neg_grad_penalty['input_idx'], list)
                    assert isinstance(neg_grad_penalty['magnitude'], float)
                    gather_indices = model.add_global_constant(
                        'neg_grad_penalty_input_idx',
                        neg_grad_penalty['input_idx'],
                        dtype=np.int32)
                    penalty_scaler = model.add_global_constant(
                        'penalty_scaler',
                        neg_grad_penalty['magnitude'],
                        dtype=np.float32)
                    if neg_grad_penalty['input_type'] == 'tanh':
                        gathered_adjoint_pred = model.BatchGather(
                            [tanh_adjoint_pred, gather_indices],
                            'gathered_adjoint_pred',
                            output_dtypes=(np.float32, (len(
                                neg_grad_penalty['input_idx']), )))
                        origin_input_gate = model.BatchGather(
                            [
                                model.input_feature_schema.tanh_input,
                                gather_indices
                            ],
                            'origin_input_gate',
                            output_dtypes=(np.float32, (len(
                                neg_grad_penalty['input_idx']), )))

                    elif neg_grad_penalty['input_type'] == 'sig':
                        gathered_adjoint_pred = model.BatchGather(
                            [sig_adjoint_pred, gather_indices],
                            'gathered_adjoint_pred')
                        origin_input_gate = model.BatchGather([
                            model.input_feature_schema.sig_input,
                            gather_indices
                        ], 'origin_input_gate')
                    else:
                        raise Exception(
                            'Wrong neg_grad_penalty[\'input_type\']')

                    ## TODO: Put them in a operator
                    neg_gradients = model.Relu([
                        model.Negative([
                            model.FlattenToVec([gathered_adjoint_pred],
                                               'flat_gathered_adjoint_pred')
                        ], 'neg_gathered_adjoint_pred')
                    ], 'neg_gradients')
                    input_gate = model.Relu([
                        model.Sign([
                            model.FlattenToVec([origin_input_gate],
                                               'flat_origin_input_gate')
                        ], 'sign_origin_input_gate')
                    ], 'input_gate')
                    input_gate_stopgrad = model.StopGradient(
                        [input_gate], 'input_gate_stopgrad')
                    scaled_neg_gradient_loss = model.Mul(
                        [
                            model.AveragedLoss([
                                model.Mul([neg_gradients, input_gate_stopgrad],
                                          'gated_neg_gradients')
                            ], 'avg_gated_neg_graident_loss'), penalty_scaler
                        ],
                        'scaled_neg_gradient_loss',
                        name='PenaltyScaler')
                    model.add_metric_field('neg_gradient_loss',
                                           scaled_neg_gradient_loss)
                    model.add_loss(scaled_neg_gradient_loss)

            if loss_function == 'scaled_l2':
                print('[Pi-NN Build Net]: Use scaled_l2 loss, but l1 metrics.')
                loss_and_metrics = model.BatchDirectWeightedL2Loss(
                    model.trainer_extra_schema.origin_loss_record,
                    max_scale=max_loss_scale,
                )
            model.add_loss(loss_and_metrics.loss)
            loss = loss_and_metrics.loss
        else:
            raise Exception('train target: ' + train_target +
                            ' not implemented')

        model.output_schema.origin_pred.set_value(origin_pred.get(),
                                                  unsafe=True)
        model.output_schema.loss.set_value(loss.get(), unsafe=True)

        return origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss
Beispiel #4
0
def build_adjoint_pinn(
    model,
    sig_input_dim=1,
    tanh_input_dim=1,
    sig_net_dim=[1],
    tanh_net_dim=[1],
    weight_optim=None,
    bias_optim=None,
    adjoint_tag=Tags.EXCLUDE_FROM_PREDICTION,
    train_target=TrainTarget.ADJOINT,
    loss_function='scaled_l1',
    max_loss_scale=1.0,
):
    '''
		sig_net_dim and tanh_net_dim are the lists of dimensions for each hidden
		layers in the sig_net and tanh_net respectively.
	'''
    assert len(sig_net_dim) * len(tanh_net_dim) > 0, 'arch cannot be empty'
    assert len(sig_net_dim) == len(tanh_net_dim), 'arch mismatch'
    assert sig_net_dim[-1] == tanh_net_dim[-1], 'last dim mismatch'

    with ParameterSharing({'origin': 'adjoint'}):
        sig_h_lst = []
        tanh_h_lst = []
        block_index = 0
        with scope.NameScope('origin'):
            sig_h = model.input_feature_schema.sig_input
            tanh_h = model.input_feature_schema.tanh_input
            for sig_n, tanh_n in zip(sig_net_dim, tanh_net_dim):
                sig_h, tanh_h = build_origin_block(
                    model,
                    sig_h,
                    tanh_h,
                    sig_n,
                    tanh_n,
                    block_index,
                    weight_optim=weight_optim,
                    bias_optim=bias_optim,
                )
                sig_h_lst.append(sig_h)
                tanh_h_lst.append(tanh_h)
                block_index += 1
            origin_pred = model.Mul([sig_h, tanh_h], 'origin_pred')
        with scope.NameScope('adjoint'):
            # adjoint_tag decides how we are going to use the adjoint net.
            with Tags(adjoint_tag):
                ad_input = model.input_feature_schema.adjoint_input
                sig_h = sig_h_lst[block_index - 1]
                tanh_h = tanh_h_lst[block_index - 1]
                # for the output, sig_h and tanh_h has the same dimention.
                output_ones = model.ConstantFill(
                    [sig_h],
                    'output_ones_{}'.format(block_index),
                    value=1.0,
                    dtype=core.DataType.FLOAT)
                beta = model.Mul([
                    tanh_h,
                    model.Mul([
                        sig_h,
                        model.Sub([output_ones, sig_h],
                                  'sig_output_sub_{}'.format(block_index))
                    ], 'sig_output_mul_{}'.format(block_index))
                ], 'sig_output_beta_{}'.format(block_index))
                alpha = model.Mul([
                    sig_h,
                    model.Sub([
                        output_ones,
                        model.Mul([tanh_h, tanh_h],
                                  'tanh_output_sq_{}'.format(block_index))
                    ], 'tanh_output_sub_{}'.format(block_index))
                ], 'tanh_output_mul_{}'.format(block_index))
                inter = model.FCTransposeW(
                    beta,
                    tanh_net_dim[-1],
                    weight_optim=weight_optim,
                    name='inter_embed_layer_{}'.format(block_index - 1))
                alpha = model.Add([alpha, inter],
                                  'tanh_output_alpha_{}'.format(block_index))
                for sig_n, tanh_n in zip(reversed(sig_net_dim[:-1]),
                                         reversed(tanh_net_dim[:-1])):
                    block_index -= 1
                    sig_h = sig_h_lst[block_index - 1]
                    tanh_h = tanh_h_lst[block_index - 1]
                    beta, alpha = build_adjoint_block(
                        model,
                        beta,
                        alpha,
                        sig_h,
                        tanh_h,
                        sig_n,
                        tanh_n,
                        block_index,
                        weight_optim=weight_optim,
                    )
                sig_adjoint_pred = model.FCTransposeW(
                    beta,
                    sig_input_dim,
                    weight_optim=weight_optim,
                    name='sig_fc_layer_{}'.format(block_index - 1))
                tanh_adjoint_pred = model.FCTransposeW(
                    alpha,
                    tanh_input_dim,
                    weight_optim=weight_optim,
                    name='tanh_fc_layer_{}'.format(block_index - 1))

        # Add loss
        if train_target == TrainTarget.ADJOINT:
            model.trainer_extra_schema.sig_loss_record.prediction.set_value(
                sig_adjoint_pred.get(), unsafe=True)
            model.trainer_extra_schema.tanh_loss_record.prediction.set_value(
                tanh_adjoint_pred.get(), unsafe=True)
            # CAUTIONS: BatchDirectMSELoss calls SquaredL2Distance op, which assume
            # the input are 1D vector
            sig_loss = model.BatchDirectMSELoss(
                model.trainer_extra_schema.sig_loss_record)
            tanh_loss = model.BatchDirectMSELoss(
                model.trainer_extra_schema.tanh_loss_record)
            adjoint_loss = model.Add([sig_loss, tanh_loss], 'adjoint_loss')
            model.add_loss(sig_loss)
            model.add_loss(tanh_loss)
            # Set output
            model.output_schema.sig_adjoint_pred.set_value(
                sig_adjoint_pred.get(), unsafe=True)
            model.output_schema.tanh_adjoint_pred.set_value(
                tanh_adjoint_pred.get(), unsafe=True)
            loss = adjoint_loss
        elif train_target == TrainTarget.ORIGIN:
            model.trainer_extra_schema.origin_loss_record.prediction.set_value(
                origin_pred.get(), unsafe=True)
            # Add L1 Loss
            assert max_loss_scale > 1, 'max loss scale must > 1'
            loss_and_metrics = model.BatchDirectWeightedL1Loss(
                model.trainer_extra_schema.origin_loss_record,
                max_scale=max_loss_scale,
            )
            # Add metric
            model.add_metric_field('l1_metric', loss_and_metrics.l1_metric)
            model.add_metric_field('scaled_l1_metric',
                                   loss_and_metrics.scaled_l1_metric)
            if loss_function == 'scaled_l2':
                print('[Pi-NN Build Net]: Use scaled_l2 loss, but l1 metrics.')
                loss_and_metrics = model.BatchDirectWeightedL2Loss(
                    model.trainer_extra_schema,
                    max_scale=max_loss_scale,
                )
            model.add_loss(loss_and_metrics.loss)
            loss = loss_and_metrics.loss
        else:
            raise Exception('train target: ' + train_target +
                            ' not implemented')

        model.output_schema.origin_pred.set_value(origin_pred.get(),
                                                  unsafe=True)
        model.output_schema.loss.set_value(loss.get(), unsafe=True)

        return origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss
Beispiel #5
0
def build_adjoint_mlp(
    model,
    input_dim=1,
    hidden_dims=[5, 5],
    output_dim=1,
    optim=None,
):
    ''' Precondition:
			model.input_feature_schema.origin_input has shape of (input_dim, )
			model.input_feature_schema.adjoint_input has shape of (output_dim, )
		Note:
			adjoint_input is binary array, e.g. [1, 0], which is used as the 
			"selecter".
	'''
    assert len(hidden_dims) >= 1, "at least one hidden dim"
    with ParameterSharing({'origin': 'adjoint'}):
        z = model.input_feature_schema.origin_input
        z_lst = []
        idx = 0
        with scope.NameScope('origin'):
            for hidden_dim in hidden_dims:
                gamma = model.FC(z,
                                 hidden_dim,
                                 weight_optim=optim,
                                 bias_optim=optim,
                                 name='fc{}'.format(idx))
                z = model.Sigmoid(gamma, 'sig{}'.format(idx))
                z_lst.append(z)
                idx += 1
            # Output layer: no grad for the bias in this layer,
            # use FCWithoutBias
            origin_pred = model.FCWithoutBias(z,
                                              output_dim,
                                              weight_optim=optim,
                                              name='fc{}'.format(idx))

        with scope.NameScope('adjoint'):
            with Tags(Tags.EXCLUDE_FROM_PREDICTION):
                alpha = model.input_feature_schema.adjoint_input
                for hidden_dim in reversed(hidden_dims):
                    gamma_ad = model.FCTransposeW(alpha,
                                                  hidden_dim,
                                                  weight_optim=optim,
                                                  name='fc{}'.format(idx))
                    z = z_lst[idx - 1]
                    # Note: passing gradient is helpful
                    # z = model.StopGradient(z, z)
                    # TODO: use add_global_constant
                    one_vector = model.ConstantFill([z],
                                                    'ones{}'.format(idx),
                                                    value=1.0,
                                                    dtype=core.DataType.FLOAT)
                    multiplier = model.Mul(
                        [z, model.Sub([one_vector, z], 'sub{}'.format(idx))],
                        'multiplier{}'.format(idx),
                    )
                    alpha = model.Mul([gamma_ad, multiplier],
                                      'adjoint_layer{}'.format(idx))
                    idx -= 1
                adjoint_pred = model.FCTransposeW(alpha,
                                                  input_dim,
                                                  weight_optim=optim,
                                                  name='fc{}'.format(idx))
    # Add loss
    model.trainer_extra_schema.prediction.set_value(adjoint_pred.get(),
                                                    unsafe=True)
    loss = model.BatchDirectMSELoss(model.trainer_extra_schema)
    model.add_loss(loss)
    # Set output
    model.output_schema.origin_pred.set_value(origin_pred.get(), unsafe=True)
    model.output_schema.adjoint_pred.set_value(adjoint_pred.get(), unsafe=True)
    model.output_schema.loss.set_value(loss.get(), unsafe=True)

    return origin_pred, adjoint_pred, loss