def testSelectRecordByContext(self): float_features = self.model.input_feature_schema.float_features float_array = np.array([1.0, 2.0], dtype=np.float32) schema.FeedRecord(float_features, [float_array]) with Tags(Tags.EXCLUDE_FROM_PREDICTION): log_float_features = self.model.Log(float_features, 1) joined = self.model.SelectRecordByContext( schema.Struct( (InstantiationContext.PREDICTION, float_features), (InstantiationContext.TRAINING, log_float_features), # TODO: TRAIN_ONLY layers are also generated in eval (InstantiationContext.EVAL, log_float_features), )) # model.output_schema has to a struct self.model.output_schema = schema.Struct(('joined', joined)) predict_net = layer_model_instantiator.generate_predict_net(self.model) workspace.RunNetOnce(predict_net) predict_output = schema.FetchRecord(predict_net.output_record()) npt.assert_array_equal(float_array, predict_output['joined']()) eval_net = layer_model_instantiator.generate_eval_net(self.model) workspace.RunNetOnce(eval_net) eval_output = schema.FetchRecord(eval_net.output_record()) npt.assert_array_equal(np.log(float_array), eval_output['joined']()) _, train_net = ( layer_model_instantiator.generate_training_nets_forward_only( self.model)) workspace.RunNetOnce(train_net) train_output = schema.FetchRecord(train_net.output_record()) npt.assert_array_equal(np.log(float_array), train_output['joined']())
def build_input_reader( model, db_name, db_type, input_names_lst, batch_size=1, data_type='train', ): ''' Init the dbreader and build the network for reading the data, however, the newwork is not connected to the computation network yet. Therefore we can switch between different data sources. ''' assert batch_size != 0, 'batch_size cannot be zero' reader_init_net = core.Net('reader_init_net_' + data_type) dbreader = reader_init_net.CreateDB([], 'dbreader_' + data_type, db=db_name, db_type=db_type) # need to initialze dbreader ONLY ONCE workspace.RunNetOnce(reader_init_net) if data_type == 'train': TAG = Tags.TRAIN_ONLY elif data_type == 'eval': TAG = Tags.EVAL_ONLY else: raise Exception('data type: {} not valid.'.format(data_type)) with Tags(TAG): # the last one is the label input_data_struct = model.TensorProtosDBInput([dbreader], input_names_lst, name='DBInput_' + data_type, batch_size=batch_size) input_data_lst = [input_data for input_data in input_data_struct] for i in range(len(input_data_lst) - 1): input_data_lst[i] = model.StopGradient(input_data_lst[i], input_data_lst[i]) return input_data_lst
def build_adjoint_pinn( model, sig_input_dim=1, tanh_input_dim=1, sig_net_dim=[1], tanh_net_dim=[1], weight_optim=None, bias_optim=None, adjoint_tag='no_tag', train_target=TrainTarget.ADJOINT, loss_function='scaled_l1', max_loss_scale=1.0, neg_grad_penalty=None, ): ''' sig_net_dim and tanh_net_dim are the lists of dimensions for each hidden layers in the sig_net and tanh_net respectively. neg_grad_penalty['input_type']: which input type (sig/tanh) neg_grad_penalty['input_idx']: which input dim to apply negative gradient penalty neg_grad_penalty['magnitude']: the magnitude of the penalty ''' assert len(sig_net_dim) * len(tanh_net_dim) > 0, 'arch cannot be empty' assert len(sig_net_dim) == len(tanh_net_dim), 'arch mismatch' assert sig_net_dim[-1] == tanh_net_dim[-1], 'last dim mismatch' with ParameterSharing({'origin': 'adjoint'}): sig_h_lst = [] tanh_h_lst = [] block_index = 0 with scope.NameScope('origin'): sig_h = model.input_feature_schema.sig_input tanh_h = model.input_feature_schema.tanh_input for sig_n, tanh_n in zip(sig_net_dim, tanh_net_dim): sig_h, tanh_h = build_origin_block( model, sig_h, tanh_h, sig_n, tanh_n, block_index, weight_optim=weight_optim, bias_optim=bias_optim, ) sig_h_lst.append(sig_h) tanh_h_lst.append(tanh_h) block_index += 1 origin_pred = model.Mul([sig_h, tanh_h], 'origin_pred') with scope.NameScope('adjoint'): # adjoint_tag decides how we are going to use the adjoint net. with Tags(adjoint_tag): ad_input = model.input_feature_schema.adjoint_input sig_h = sig_h_lst[block_index - 1] tanh_h = tanh_h_lst[block_index - 1] # for the output, sig_h and tanh_h has the same dimention. output_ones = model.ConstantFill( [sig_h], 'output_ones_{}'.format(block_index), value=1.0, dtype=core.DataType.FLOAT) beta = model.Mul([ tanh_h, model.Mul([ sig_h, model.Sub([output_ones, sig_h], 'sig_output_sub_{}'.format(block_index)) ], 'sig_output_mul_{}'.format(block_index)) ], 'sig_output_beta_{}'.format(block_index)) alpha = model.Mul([ sig_h, model.Sub([ output_ones, model.Mul([tanh_h, tanh_h], 'tanh_output_sq_{}'.format(block_index)) ], 'tanh_output_sub_{}'.format(block_index)) ], 'tanh_output_mul_{}'.format(block_index)) inter = model.FCTransposeW( beta, tanh_net_dim[-1], weight_optim=weight_optim, name='inter_embed_layer_{}'.format(block_index - 1)) alpha = model.Add([alpha, inter], 'tanh_output_alpha_{}'.format(block_index)) for sig_n, tanh_n in zip(reversed(sig_net_dim[:-1]), reversed(tanh_net_dim[:-1])): block_index -= 1 sig_h = sig_h_lst[block_index - 1] tanh_h = tanh_h_lst[block_index - 1] beta, alpha = build_adjoint_block( model, beta, alpha, sig_h, tanh_h, sig_n, tanh_n, block_index, weight_optim=weight_optim, ) sig_adjoint_pred = model.FCTransposeW( beta, sig_input_dim, weight_optim=weight_optim, name='sig_fc_layer_{}'.format(block_index - 1)) tanh_adjoint_pred = model.FCTransposeW( alpha, tanh_input_dim, weight_optim=weight_optim, name='tanh_fc_layer_{}'.format(block_index - 1)) # Add loss if train_target == TrainTarget.ADJOINT: model.trainer_extra_schema.sig_loss_record.prediction.set_value( sig_adjoint_pred.get(), unsafe=True) model.trainer_extra_schema.tanh_loss_record.prediction.set_value( tanh_adjoint_pred.get(), unsafe=True) # CAUTIONS: BatchDirectMSELoss calls SquaredL2Distance op, which assume # the input are 1D vector sig_loss = model.BatchDirectMSELoss( model.trainer_extra_schema.sig_loss_record) tanh_loss = model.BatchDirectMSELoss( model.trainer_extra_schema.tanh_loss_record) adjoint_loss = model.Add([sig_loss, tanh_loss], 'adjoint_loss') model.add_loss(sig_loss) model.add_loss(tanh_loss) # Set output model.output_schema.sig_adjoint_pred.set_value( sig_adjoint_pred.get(), unsafe=True) model.output_schema.tanh_adjoint_pred.set_value( tanh_adjoint_pred.get(), unsafe=True) loss = adjoint_loss if train_target == TrainTarget.ORIGIN: model.trainer_extra_schema.origin_loss_record.prediction.set_value( origin_pred.get(), unsafe=True) # Add L1 Loss assert max_loss_scale > 1, 'max loss scale must > 1' loss_and_metrics = model.BatchDirectWeightedL1Loss( model.trainer_extra_schema.origin_loss_record, max_scale=max_loss_scale, ) # Add metric model.add_metric_field('l1_metric', loss_and_metrics.l1_metric) model.add_metric_field('scaled_l1_metric', loss_and_metrics.scaled_l1_metric) # Add negative gradient penalty ## TODO: Put them in a layer if neg_grad_penalty: # TODO: make neg_grad_penalty to a object with Tags(Tags.EXCLUDE_FROM_PREDICTION): assert isinstance(neg_grad_penalty['input_idx'], list) assert isinstance(neg_grad_penalty['magnitude'], float) gather_indices = model.add_global_constant( 'neg_grad_penalty_input_idx', neg_grad_penalty['input_idx'], dtype=np.int32) penalty_scaler = model.add_global_constant( 'penalty_scaler', neg_grad_penalty['magnitude'], dtype=np.float32) if neg_grad_penalty['input_type'] == 'tanh': gathered_adjoint_pred = model.BatchGather( [tanh_adjoint_pred, gather_indices], 'gathered_adjoint_pred', output_dtypes=(np.float32, (len( neg_grad_penalty['input_idx']), ))) origin_input_gate = model.BatchGather( [ model.input_feature_schema.tanh_input, gather_indices ], 'origin_input_gate', output_dtypes=(np.float32, (len( neg_grad_penalty['input_idx']), ))) elif neg_grad_penalty['input_type'] == 'sig': gathered_adjoint_pred = model.BatchGather( [sig_adjoint_pred, gather_indices], 'gathered_adjoint_pred') origin_input_gate = model.BatchGather([ model.input_feature_schema.sig_input, gather_indices ], 'origin_input_gate') else: raise Exception( 'Wrong neg_grad_penalty[\'input_type\']') ## TODO: Put them in a operator neg_gradients = model.Relu([ model.Negative([ model.FlattenToVec([gathered_adjoint_pred], 'flat_gathered_adjoint_pred') ], 'neg_gathered_adjoint_pred') ], 'neg_gradients') input_gate = model.Relu([ model.Sign([ model.FlattenToVec([origin_input_gate], 'flat_origin_input_gate') ], 'sign_origin_input_gate') ], 'input_gate') input_gate_stopgrad = model.StopGradient( [input_gate], 'input_gate_stopgrad') scaled_neg_gradient_loss = model.Mul( [ model.AveragedLoss([ model.Mul([neg_gradients, input_gate_stopgrad], 'gated_neg_gradients') ], 'avg_gated_neg_graident_loss'), penalty_scaler ], 'scaled_neg_gradient_loss', name='PenaltyScaler') model.add_metric_field('neg_gradient_loss', scaled_neg_gradient_loss) model.add_loss(scaled_neg_gradient_loss) if loss_function == 'scaled_l2': print('[Pi-NN Build Net]: Use scaled_l2 loss, but l1 metrics.') loss_and_metrics = model.BatchDirectWeightedL2Loss( model.trainer_extra_schema.origin_loss_record, max_scale=max_loss_scale, ) model.add_loss(loss_and_metrics.loss) loss = loss_and_metrics.loss else: raise Exception('train target: ' + train_target + ' not implemented') model.output_schema.origin_pred.set_value(origin_pred.get(), unsafe=True) model.output_schema.loss.set_value(loss.get(), unsafe=True) return origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss
def build_adjoint_pinn( model, sig_input_dim=1, tanh_input_dim=1, sig_net_dim=[1], tanh_net_dim=[1], weight_optim=None, bias_optim=None, adjoint_tag=Tags.EXCLUDE_FROM_PREDICTION, train_target=TrainTarget.ADJOINT, loss_function='scaled_l1', max_loss_scale=1.0, ): ''' sig_net_dim and tanh_net_dim are the lists of dimensions for each hidden layers in the sig_net and tanh_net respectively. ''' assert len(sig_net_dim) * len(tanh_net_dim) > 0, 'arch cannot be empty' assert len(sig_net_dim) == len(tanh_net_dim), 'arch mismatch' assert sig_net_dim[-1] == tanh_net_dim[-1], 'last dim mismatch' with ParameterSharing({'origin': 'adjoint'}): sig_h_lst = [] tanh_h_lst = [] block_index = 0 with scope.NameScope('origin'): sig_h = model.input_feature_schema.sig_input tanh_h = model.input_feature_schema.tanh_input for sig_n, tanh_n in zip(sig_net_dim, tanh_net_dim): sig_h, tanh_h = build_origin_block( model, sig_h, tanh_h, sig_n, tanh_n, block_index, weight_optim=weight_optim, bias_optim=bias_optim, ) sig_h_lst.append(sig_h) tanh_h_lst.append(tanh_h) block_index += 1 origin_pred = model.Mul([sig_h, tanh_h], 'origin_pred') with scope.NameScope('adjoint'): # adjoint_tag decides how we are going to use the adjoint net. with Tags(adjoint_tag): ad_input = model.input_feature_schema.adjoint_input sig_h = sig_h_lst[block_index - 1] tanh_h = tanh_h_lst[block_index - 1] # for the output, sig_h and tanh_h has the same dimention. output_ones = model.ConstantFill( [sig_h], 'output_ones_{}'.format(block_index), value=1.0, dtype=core.DataType.FLOAT) beta = model.Mul([ tanh_h, model.Mul([ sig_h, model.Sub([output_ones, sig_h], 'sig_output_sub_{}'.format(block_index)) ], 'sig_output_mul_{}'.format(block_index)) ], 'sig_output_beta_{}'.format(block_index)) alpha = model.Mul([ sig_h, model.Sub([ output_ones, model.Mul([tanh_h, tanh_h], 'tanh_output_sq_{}'.format(block_index)) ], 'tanh_output_sub_{}'.format(block_index)) ], 'tanh_output_mul_{}'.format(block_index)) inter = model.FCTransposeW( beta, tanh_net_dim[-1], weight_optim=weight_optim, name='inter_embed_layer_{}'.format(block_index - 1)) alpha = model.Add([alpha, inter], 'tanh_output_alpha_{}'.format(block_index)) for sig_n, tanh_n in zip(reversed(sig_net_dim[:-1]), reversed(tanh_net_dim[:-1])): block_index -= 1 sig_h = sig_h_lst[block_index - 1] tanh_h = tanh_h_lst[block_index - 1] beta, alpha = build_adjoint_block( model, beta, alpha, sig_h, tanh_h, sig_n, tanh_n, block_index, weight_optim=weight_optim, ) sig_adjoint_pred = model.FCTransposeW( beta, sig_input_dim, weight_optim=weight_optim, name='sig_fc_layer_{}'.format(block_index - 1)) tanh_adjoint_pred = model.FCTransposeW( alpha, tanh_input_dim, weight_optim=weight_optim, name='tanh_fc_layer_{}'.format(block_index - 1)) # Add loss if train_target == TrainTarget.ADJOINT: model.trainer_extra_schema.sig_loss_record.prediction.set_value( sig_adjoint_pred.get(), unsafe=True) model.trainer_extra_schema.tanh_loss_record.prediction.set_value( tanh_adjoint_pred.get(), unsafe=True) # CAUTIONS: BatchDirectMSELoss calls SquaredL2Distance op, which assume # the input are 1D vector sig_loss = model.BatchDirectMSELoss( model.trainer_extra_schema.sig_loss_record) tanh_loss = model.BatchDirectMSELoss( model.trainer_extra_schema.tanh_loss_record) adjoint_loss = model.Add([sig_loss, tanh_loss], 'adjoint_loss') model.add_loss(sig_loss) model.add_loss(tanh_loss) # Set output model.output_schema.sig_adjoint_pred.set_value( sig_adjoint_pred.get(), unsafe=True) model.output_schema.tanh_adjoint_pred.set_value( tanh_adjoint_pred.get(), unsafe=True) loss = adjoint_loss elif train_target == TrainTarget.ORIGIN: model.trainer_extra_schema.origin_loss_record.prediction.set_value( origin_pred.get(), unsafe=True) # Add L1 Loss assert max_loss_scale > 1, 'max loss scale must > 1' loss_and_metrics = model.BatchDirectWeightedL1Loss( model.trainer_extra_schema.origin_loss_record, max_scale=max_loss_scale, ) # Add metric model.add_metric_field('l1_metric', loss_and_metrics.l1_metric) model.add_metric_field('scaled_l1_metric', loss_and_metrics.scaled_l1_metric) if loss_function == 'scaled_l2': print('[Pi-NN Build Net]: Use scaled_l2 loss, but l1 metrics.') loss_and_metrics = model.BatchDirectWeightedL2Loss( model.trainer_extra_schema, max_scale=max_loss_scale, ) model.add_loss(loss_and_metrics.loss) loss = loss_and_metrics.loss else: raise Exception('train target: ' + train_target + ' not implemented') model.output_schema.origin_pred.set_value(origin_pred.get(), unsafe=True) model.output_schema.loss.set_value(loss.get(), unsafe=True) return origin_pred, sig_adjoint_pred, tanh_adjoint_pred, loss
def build_adjoint_mlp( model, input_dim=1, hidden_dims=[5, 5], output_dim=1, optim=None, ): ''' Precondition: model.input_feature_schema.origin_input has shape of (input_dim, ) model.input_feature_schema.adjoint_input has shape of (output_dim, ) Note: adjoint_input is binary array, e.g. [1, 0], which is used as the "selecter". ''' assert len(hidden_dims) >= 1, "at least one hidden dim" with ParameterSharing({'origin': 'adjoint'}): z = model.input_feature_schema.origin_input z_lst = [] idx = 0 with scope.NameScope('origin'): for hidden_dim in hidden_dims: gamma = model.FC(z, hidden_dim, weight_optim=optim, bias_optim=optim, name='fc{}'.format(idx)) z = model.Sigmoid(gamma, 'sig{}'.format(idx)) z_lst.append(z) idx += 1 # Output layer: no grad for the bias in this layer, # use FCWithoutBias origin_pred = model.FCWithoutBias(z, output_dim, weight_optim=optim, name='fc{}'.format(idx)) with scope.NameScope('adjoint'): with Tags(Tags.EXCLUDE_FROM_PREDICTION): alpha = model.input_feature_schema.adjoint_input for hidden_dim in reversed(hidden_dims): gamma_ad = model.FCTransposeW(alpha, hidden_dim, weight_optim=optim, name='fc{}'.format(idx)) z = z_lst[idx - 1] # Note: passing gradient is helpful # z = model.StopGradient(z, z) # TODO: use add_global_constant one_vector = model.ConstantFill([z], 'ones{}'.format(idx), value=1.0, dtype=core.DataType.FLOAT) multiplier = model.Mul( [z, model.Sub([one_vector, z], 'sub{}'.format(idx))], 'multiplier{}'.format(idx), ) alpha = model.Mul([gamma_ad, multiplier], 'adjoint_layer{}'.format(idx)) idx -= 1 adjoint_pred = model.FCTransposeW(alpha, input_dim, weight_optim=optim, name='fc{}'.format(idx)) # Add loss model.trainer_extra_schema.prediction.set_value(adjoint_pred.get(), unsafe=True) loss = model.BatchDirectMSELoss(model.trainer_extra_schema) model.add_loss(loss) # Set output model.output_schema.origin_pred.set_value(origin_pred.get(), unsafe=True) model.output_schema.adjoint_pred.set_value(adjoint_pred.get(), unsafe=True) model.output_schema.loss.set_value(loss.get(), unsafe=True) return origin_pred, adjoint_pred, loss