def get_relevance_model(self, feature_layer_keys_to_fns={}) -> RelevanceModel: """ Creates RelevanceModel suited for classification use-case. NOTE: Override this method to create custom loss, scorer, model objects. """ # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=self.feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=self.tfrecord_type, file_io=self.file_io) # Define loss object from loss key loss: RelevanceLossBase = categorical_cross_entropy.get_loss( loss_key=self.loss_key) # Define scorer scorer: ScorerBase = RelevanceScorer.from_model_config_file( model_config_file=self.model_config_file, interaction_model=interaction_model, loss=loss, output_name=self.args.output_name, logger=self.logger, file_io=self.file_io, ) # Define metrics objects from metrics keys metrics: List[Union[Type[Metric], str]] = [ metric_factory.get_metric(metric_key=metric_key) for metric_key in self.metrics_keys ] # Define optimizer optimizer: Optimizer = get_optimizer( optimizer_key=self.optimizer_key, learning_rate=self.args.learning_rate, learning_rate_decay=self.args.learning_rate_decay, learning_rate_decay_steps=self.args.learning_rate_decay_steps, gradient_clip_value=self.args.gradient_clip_value, ) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = RelevanceModel( feature_config=self.feature_config, scorer=scorer, metrics=metrics, optimizer=optimizer, tfrecord_type=self.tfrecord_type, model_file=self.args.model_file, compile_keras_model=self.args.compile_keras_model, output_name=self.args.output_name, file_io=self.local_io, logger=self.logger, ) return relevance_model
def from_relevance_scorer( cls, interaction_model: InteractionModel, model_config: dict, feature_config: FeatureConfig, loss: RelevanceLossBase, metrics: List[Union[kmetrics.Metric, str]], optimizer: Optimizer, tfrecord_type: str, file_io: FileIO, model_file: Optional[str] = None, compile_keras_model: bool = False, output_name: str = "score", logger=None, ): """Use this as constructor to define a custom InteractionModel with RelevanceScorer""" assert isinstance(interaction_model, InteractionModel) assert isinstance(loss, RelevanceLossBase) scorer: ScorerBase = RelevanceScorer( model_config=model_config, interaction_model=interaction_model, loss=loss, output_name=output_name, ) return cls( scorer=scorer, feature_config=feature_config, metrics=metrics, optimizer=optimizer, tfrecord_type=tfrecord_type, model_file=model_file, compile_keras_model=compile_keras_model, output_name=output_name, file_io=file_io, logger=logger, )
def get_relevance_model(self, feature_layer_keys_to_fns={}) -> RelevanceModel: """ Creates a RelevanceModel that can be used for training and evaluating Parameters ---------- feature_layer_keys_to_fns : dict of (str, function) dictionary of function names mapped to tensorflow compatible function definitions that can now be used in the InteractionModel as a feature function to transform input features Returns ------- `RelevanceModel` RelevanceModel that can be used for training and evaluating a classification model Notes ----- Override this method to create custom loss, scorer, model objects """ # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=self.feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=self.tfrecord_type, file_io=self.file_io, ) # Define loss object from loss key loss: RelevanceLossBase = categorical_cross_entropy.get_loss(loss_key=self.loss_key) # Define scorer scorer: ScorerBase = RelevanceScorer( feature_config=self.feature_config, model_config=self.model_config, interaction_model=interaction_model, loss=loss, output_name=self.args.output_name, logger=self.logger, file_io=self.file_io, ) # Define metrics objects from metrics keys metrics: List[Union[Type[Metric], str]] = [ metrics_factory.get_metric(metric_key=metric_key) for metric_key in self.metrics_keys ] # Define optimizer optimizer: Optimizer = get_optimizer(model_config=self.model_config) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = ClassificationModel( feature_config=self.feature_config, scorer=scorer, metrics=metrics, optimizer=optimizer, tfrecord_type=self.tfrecord_type, model_file=self.args.model_file, initialize_layers_dict=ast.literal_eval(self.args.initialize_layers_dict), freeze_layers_list=ast.literal_eval(self.args.freeze_layers_list), compile_keras_model=self.args.compile_keras_model, output_name=self.args.output_name, file_io=self.local_io, logger=self.logger, ) return relevance_model
def test_cyclic_lr_in_training_pipeline(self): """Test a cyclic learning rate in model training""" Logger = logging_utils.setup_logging( reset=True, file_name=os.path.join(INPUT_DIR + 'ranklib', "output_log.csv"), log_to_file=True, ) io = LocalIO() feature_config = self.parse_config( TFRecordTypeKey.SEQUENCE_EXAMPLE, self.feature_config_yaml_convert_to_clicks, io) dataset = RelevanceDataset( data_dir=INPUT_DIR + '/ranklib', data_format=DataFormatKey.RANKLIB, feature_config=feature_config, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, batch_size=2, file_io=io, preprocessing_keys_to_fns={}, logger=Logger, keep_additional_info=KEEP_ADDITIONAL_INFO, non_zero_features_only=NON_ZERO_FEATURES_ONLY, max_sequence_size=319, ) # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns={}, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, max_sequence_size=319, file_io=io, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=LossKey.RANK_ONE_LISTNET, scoring_type=ScoringTypeKey.POINTWISE) # Define scorer scorer: ScorerBase = RelevanceScorer.from_model_config_file( model_config_file=self.model_config_file, interaction_model=interaction_model, loss=loss, logger=Logger, file_io=io, ) optimizer: Optimizer = get_optimizer( model_config=io.read_yaml(self.model_config_file)) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = RankingModel( feature_config=feature_config, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, scorer=scorer, optimizer=optimizer, model_file=None, file_io=io, logger=Logger, ) callbacks_list = [] my_callback_object = LrCallback() callbacks_list.append(my_callback_object) history = relevance_model.model.fit( x=dataset.train, validation_data=dataset.validation, epochs=2, verbose=True, callbacks=callbacks_list, ) lr_list = my_callback_object.get_lr_list() lr_gold = [ 0.001, 0.020800006, 0.040599994, 0.0604, 0.080199994, 0.1, 0.080199994, 0.0604, 0.040599994, 0.020800006, 0.001, 0.010900003, 0.020800006, 0.030699994, 0.040599994, 0.050499998, 0.040599994, 0.030699994, 0.020800006, 0.010900003, 0.001, 0.0059499955, 0.010900003, 0.015849996, 0.020800006, 0.02575, 0.020800006, 0.015849996, 0.010900003, 0.0059499955, 0.001, 0.0034749978, 0.0059500015, 0.008424998, 0.010900003, 0.013375, 0.010900003, 0.008424998, 0.0059500015, 0.0034749978, 0.001, 0.0022374988, 0.0034749978, 0.0047125025, 0.0059500015, 0.0071875, 0.0059500015, 0.0047125025 ] for i in range(len(lr_list)): assert np.isclose(lr_gold[i], lr_list[i])
def from_relevance_scorer( cls, feature_config: FeatureConfig, interaction_model: InteractionModel, model_config: dict, loss: RelevanceLossBase, metrics: List[Union[kmetrics.Metric, str]], optimizer: Optimizer, tfrecord_type: str, file_io: FileIO, model_file: Optional[str] = None, initialize_layers_dict: dict = {}, freeze_layers_list: list = [], compile_keras_model: bool = False, output_name: str = "score", logger=None, ): """ Create a RelevanceModel with default Scorer function constructed from an InteractionModel Parameters ---------- feature_config : `FeatureConfig` object FeatureConfig object that defines the features to be loaded in the dataset and the preprocessing functions to be applied to each of them tfrecord_type : {"example", "sequence_example"} Type of the TFRecord protobuf message used for TFRecordDataset file_io : `FileIO` object file I/O handler objects for reading and writing data interaction_model : `InteractionModel` object InteractionModel object that converts input features into a dense feature representation loss : `RelevanceLossBase` object Loss object defining the final activation layer and the loss function metrics : list List of keras Metric classes that will be used for evaluating the trained model optimizer : `Optimizer` Tensorflow keras optimizer to be used for training the model model_file : str, optional Path to pretrained model file to be loaded for evaluation or retraining initialize_layers_dict : dict, optional Dictionary of tensorflow layer names mapped to the path of pretrained weights Use this for transfer learning with pretrained weights freeze_layers_list : list, optional List of model layer names to be frozen Use this for freezing pretrained weights from other ml4ir models compile_keras_model : bool, optional Whether the keras model loaded from disk should be compiled with loss, metrics and an optimizer output_name : str, optional Name of the output tensorflow node that captures the score logger : `Logger`, optional logging handler for status messages Returns ------- RelevanceModel RelevanceModel object with a default scorer build with a custom InteractionModel """ assert isinstance(interaction_model, InteractionModel) assert isinstance(loss, RelevanceLossBase) scorer: ScorerBase = RelevanceScorer( model_config=model_config, interaction_model=interaction_model, loss=loss, output_name=output_name, ) return cls( scorer=scorer, feature_config=feature_config, metrics=metrics, optimizer=optimizer, tfrecord_type=tfrecord_type, model_file=model_file, initialize_layers_dict=initialize_layers_dict, freeze_layers_list=freeze_layers_list, compile_keras_model=compile_keras_model, output_name=output_name, file_io=file_io, logger=logger, )
def get_ranking_model( self, loss_key: str, metrics_keys: List, feature_config: FeatureConfig, feature_layer_keys_to_fns={}, ) -> RelevanceModel: """ Creates RankingModel NOTE: Override this method to create custom loss, scorer, model objects """ # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=self.args.tfrecord_type, max_sequence_size=self.args.max_sequence_size, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=loss_key, scoring_type=self.args.scoring_type) # Define scorer scorer: ScorerBase = RelevanceScorer.from_model_config_file( model_config_file=self.args.model_config, interaction_model=interaction_model, loss=loss, output_name=self.args.output_name, ) # Define metrics objects from metrics keys metrics: List[Union[Type[Metric], str]] = [ metric_factory.get_metric(metric_key=metric_key) for metric_key in metrics_keys ] # Define optimizer optimizer: Optimizer = get_optimizer( optimizer_key=self.args.optimizer_key, learning_rate=self.args.learning_rate, learning_rate_decay=self.args.learning_rate_decay, learning_rate_decay_steps=self.args.learning_rate_decay_steps, gradient_clip_value=self.args.gradient_clip_value, ) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = RankingModel( feature_config=feature_config, tfrecord_type=self.args.tfrecord_type, scorer=scorer, metrics=metrics, optimizer=optimizer, model_file=self.args.model_file, compile_keras_model=self.args.compile_keras_model, output_name=self.args.output_name, logger=self.logger, ) return relevance_model
def test_reduce_lr_on_plateau_in_training_pipeline(self): """Test reduce lr on plateau""" self.model_config_file = MODEL_CONFIG_REDUCE_LR_ON_PLATEAU Logger = logging_utils.setup_logging( reset=True, file_name=os.path.join(INPUT_DIR + 'ranklib', "output_log.csv"), log_to_file=True, ) io = LocalIO() feature_config = self.parse_config(TFRecordTypeKey.SEQUENCE_EXAMPLE, self.feature_config_yaml_convert_to_clicks, io) model_config = io.read_yaml(self.model_config_file) dataset = RelevanceDataset( data_dir=INPUT_DIR + '/ranklib', data_format=DataFormatKey.RANKLIB, feature_config=feature_config, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, batch_size=32, file_io=io, preprocessing_keys_to_fns={}, logger=Logger, keep_additional_info=KEEP_ADDITIONAL_INFO, non_zero_features_only=NON_ZERO_FEATURES_ONLY, max_sequence_size=319, ) # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns={}, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, max_sequence_size=319, file_io=io, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=LossKey.RANK_ONE_LISTNET, scoring_type=ScoringTypeKey.POINTWISE ) # Define scorer scorer: ScorerBase = RelevanceScorer.from_model_config_file( model_config_file=self.model_config_file, interaction_model=interaction_model, loss=loss, logger=Logger, file_io=io, ) optimizer: Optimizer = get_optimizer(model_config=model_config) # Combine the above to define a RelevanceModel relevance_model: RelevanceModel = RankingModel( feature_config=feature_config, tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE, scorer=scorer, optimizer=optimizer, model_file=None, file_io=io, logger=Logger, ) callback_list = [] callback_list.append(relevance_model.define_scheduler_as_callback(None, model_config)) my_callback_object = LrCallback() callback_list.append(my_callback_object) history = relevance_model.model.fit( x=dataset.train.shard(2, 0), validation_data=dataset.validation.shard(2, 1), epochs=10, verbose=True, callbacks=callback_list, ) lr_list = my_callback_object.get_lr_reduce_on_plateau_list() lr_gold = [50.0, 50.0, 25.0, 12.5, 6.25, 3.125, 1.5625, 1.0, 1.0, 1.0] assert np.all(np.isclose(lr_gold, lr_list))
def get_ranking_model( self, loss_key: str, metrics_keys: List, feature_config: FeatureConfig, model_config: dict = {}, feature_layer_keys_to_fns={}, initialize_layers_dict={}, freeze_layers_list=[], ) -> RelevanceModel: """ Creates RankingModel NOTE: Override this method to create custom loss, scorer, model objects """ # Define interaction model interaction_model: InteractionModel = UnivariateInteractionModel( feature_config=feature_config, feature_layer_keys_to_fns=feature_layer_keys_to_fns, tfrecord_type=self.args.tfrecord_type, max_sequence_size=self.args.max_sequence_size, file_io=self.file_io, ) # Define loss object from loss key loss: RelevanceLossBase = loss_factory.get_loss( loss_key=loss_key, scoring_type=self.args.scoring_type) # Define scorer scorer: ScorerBase = RelevanceScorer( feature_config=feature_config, model_config=self.model_config, interaction_model=interaction_model, loss=loss, output_name=self.args.output_name, logger=self.logger, file_io=self.file_io, ) # Define metrics objects from metrics keys metrics: List[Union[Type[Metric], str]] = [ metric_factory.get_metric(metric_key=metric_key) for metric_key in metrics_keys ] # Define optimizer optimizer: Optimizer = get_optimizer( model_config=self.file_io.read_yaml(self.args.model_config), ) # Combine the above to define a RelevanceModel if self.model_config["architecture_key"] == ArchitectureKey.LINEAR: RankingModelClass = LinearRankingModel else: RankingModelClass = RankingModel relevance_model: RelevanceModel = RankingModelClass( feature_config=feature_config, tfrecord_type=self.args.tfrecord_type, scorer=scorer, metrics=metrics, optimizer=optimizer, model_file=self.args.model_file, initialize_layers_dict=initialize_layers_dict, freeze_layers_list=freeze_layers_list, compile_keras_model=self.args.compile_keras_model, output_name=self.args.output_name, logger=self.logger, file_io=self.file_io, ) return relevance_model