예제 #1
0
파일: pipeline.py 프로젝트: fagan2888/ml4ir
    def get_relevance_model(self,
                            feature_layer_keys_to_fns={}) -> RelevanceModel:
        """
        Creates RelevanceModel suited for classification use-case.

        NOTE: Override this method to create custom loss, scorer, model objects.
        """

        # Define interaction model
        interaction_model: InteractionModel = UnivariateInteractionModel(
            feature_config=self.feature_config,
            feature_layer_keys_to_fns=feature_layer_keys_to_fns,
            tfrecord_type=self.tfrecord_type,
            file_io=self.file_io)

        # Define loss object from loss key
        loss: RelevanceLossBase = categorical_cross_entropy.get_loss(
            loss_key=self.loss_key)

        # Define scorer
        scorer: ScorerBase = RelevanceScorer.from_model_config_file(
            model_config_file=self.model_config_file,
            interaction_model=interaction_model,
            loss=loss,
            output_name=self.args.output_name,
            logger=self.logger,
            file_io=self.file_io,
        )

        # Define metrics objects from metrics keys
        metrics: List[Union[Type[Metric], str]] = [
            metric_factory.get_metric(metric_key=metric_key)
            for metric_key in self.metrics_keys
        ]

        # Define optimizer
        optimizer: Optimizer = get_optimizer(
            optimizer_key=self.optimizer_key,
            learning_rate=self.args.learning_rate,
            learning_rate_decay=self.args.learning_rate_decay,
            learning_rate_decay_steps=self.args.learning_rate_decay_steps,
            gradient_clip_value=self.args.gradient_clip_value,
        )

        # Combine the above to define a RelevanceModel
        relevance_model: RelevanceModel = RelevanceModel(
            feature_config=self.feature_config,
            scorer=scorer,
            metrics=metrics,
            optimizer=optimizer,
            tfrecord_type=self.tfrecord_type,
            model_file=self.args.model_file,
            compile_keras_model=self.args.compile_keras_model,
            output_name=self.args.output_name,
            file_io=self.local_io,
            logger=self.logger,
        )
        return relevance_model
예제 #2
0
    def from_relevance_scorer(
        cls,
        interaction_model: InteractionModel,
        model_config: dict,
        feature_config: FeatureConfig,
        loss: RelevanceLossBase,
        metrics: List[Union[kmetrics.Metric, str]],
        optimizer: Optimizer,
        tfrecord_type: str,
        file_io: FileIO,
        model_file: Optional[str] = None,
        compile_keras_model: bool = False,
        output_name: str = "score",
        logger=None,
    ):
        """Use this as constructor to define a custom InteractionModel with RelevanceScorer"""
        assert isinstance(interaction_model, InteractionModel)
        assert isinstance(loss, RelevanceLossBase)

        scorer: ScorerBase = RelevanceScorer(
            model_config=model_config,
            interaction_model=interaction_model,
            loss=loss,
            output_name=output_name,
        )

        return cls(
            scorer=scorer,
            feature_config=feature_config,
            metrics=metrics,
            optimizer=optimizer,
            tfrecord_type=tfrecord_type,
            model_file=model_file,
            compile_keras_model=compile_keras_model,
            output_name=output_name,
            file_io=file_io,
            logger=logger,
        )
예제 #3
0
    def get_relevance_model(self, feature_layer_keys_to_fns={}) -> RelevanceModel:
        """
        Creates a RelevanceModel that can be used for training and evaluating

        Parameters
        ----------
        feature_layer_keys_to_fns : dict of (str, function)
            dictionary of function names mapped to tensorflow compatible
            function definitions that can now be used in the InteractionModel
            as a feature function to transform input features

        Returns
        -------
        `RelevanceModel`
            RelevanceModel that can be used for training and evaluating
            a classification model

        Notes
        -----
        Override this method to create custom loss, scorer, model objects
        """

        # Define interaction model
        interaction_model: InteractionModel = UnivariateInteractionModel(
            feature_config=self.feature_config,
            feature_layer_keys_to_fns=feature_layer_keys_to_fns,
            tfrecord_type=self.tfrecord_type,
            file_io=self.file_io,
        )

        # Define loss object from loss key
        loss: RelevanceLossBase = categorical_cross_entropy.get_loss(loss_key=self.loss_key)

        # Define scorer
        scorer: ScorerBase = RelevanceScorer(
            feature_config=self.feature_config,
            model_config=self.model_config,
            interaction_model=interaction_model,
            loss=loss,
            output_name=self.args.output_name,
            logger=self.logger,
            file_io=self.file_io,
        )

        # Define metrics objects from metrics keys
        metrics: List[Union[Type[Metric], str]] = [
            metrics_factory.get_metric(metric_key=metric_key) for metric_key in self.metrics_keys
        ]

        # Define optimizer
        optimizer: Optimizer = get_optimizer(model_config=self.model_config)

        # Combine the above to define a RelevanceModel
        relevance_model: RelevanceModel = ClassificationModel(
            feature_config=self.feature_config,
            scorer=scorer,
            metrics=metrics,
            optimizer=optimizer,
            tfrecord_type=self.tfrecord_type,
            model_file=self.args.model_file,
            initialize_layers_dict=ast.literal_eval(self.args.initialize_layers_dict),
            freeze_layers_list=ast.literal_eval(self.args.freeze_layers_list),
            compile_keras_model=self.args.compile_keras_model,
            output_name=self.args.output_name,
            file_io=self.local_io,
            logger=self.logger,
        )
        return relevance_model
예제 #4
0
    def test_cyclic_lr_in_training_pipeline(self):
        """Test a cyclic learning rate in model training"""
        Logger = logging_utils.setup_logging(
            reset=True,
            file_name=os.path.join(INPUT_DIR + 'ranklib', "output_log.csv"),
            log_to_file=True,
        )

        io = LocalIO()
        feature_config = self.parse_config(
            TFRecordTypeKey.SEQUENCE_EXAMPLE,
            self.feature_config_yaml_convert_to_clicks, io)

        dataset = RelevanceDataset(
            data_dir=INPUT_DIR + '/ranklib',
            data_format=DataFormatKey.RANKLIB,
            feature_config=feature_config,
            tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE,
            batch_size=2,
            file_io=io,
            preprocessing_keys_to_fns={},
            logger=Logger,
            keep_additional_info=KEEP_ADDITIONAL_INFO,
            non_zero_features_only=NON_ZERO_FEATURES_ONLY,
            max_sequence_size=319,
        )

        # Define interaction model
        interaction_model: InteractionModel = UnivariateInteractionModel(
            feature_config=feature_config,
            feature_layer_keys_to_fns={},
            tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE,
            max_sequence_size=319,
            file_io=io,
        )

        # Define loss object from loss key
        loss: RelevanceLossBase = loss_factory.get_loss(
            loss_key=LossKey.RANK_ONE_LISTNET,
            scoring_type=ScoringTypeKey.POINTWISE)

        # Define scorer
        scorer: ScorerBase = RelevanceScorer.from_model_config_file(
            model_config_file=self.model_config_file,
            interaction_model=interaction_model,
            loss=loss,
            logger=Logger,
            file_io=io,
        )

        optimizer: Optimizer = get_optimizer(
            model_config=io.read_yaml(self.model_config_file))

        # Combine the above to define a RelevanceModel
        relevance_model: RelevanceModel = RankingModel(
            feature_config=feature_config,
            tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE,
            scorer=scorer,
            optimizer=optimizer,
            model_file=None,
            file_io=io,
            logger=Logger,
        )
        callbacks_list = []
        my_callback_object = LrCallback()
        callbacks_list.append(my_callback_object)

        history = relevance_model.model.fit(
            x=dataset.train,
            validation_data=dataset.validation,
            epochs=2,
            verbose=True,
            callbacks=callbacks_list,
        )
        lr_list = my_callback_object.get_lr_list()
        lr_gold = [
            0.001, 0.020800006, 0.040599994, 0.0604, 0.080199994, 0.1,
            0.080199994, 0.0604, 0.040599994, 0.020800006, 0.001, 0.010900003,
            0.020800006, 0.030699994, 0.040599994, 0.050499998, 0.040599994,
            0.030699994, 0.020800006, 0.010900003, 0.001, 0.0059499955,
            0.010900003, 0.015849996, 0.020800006, 0.02575, 0.020800006,
            0.015849996, 0.010900003, 0.0059499955, 0.001, 0.0034749978,
            0.0059500015, 0.008424998, 0.010900003, 0.013375, 0.010900003,
            0.008424998, 0.0059500015, 0.0034749978, 0.001, 0.0022374988,
            0.0034749978, 0.0047125025, 0.0059500015, 0.0071875, 0.0059500015,
            0.0047125025
        ]

        for i in range(len(lr_list)):
            assert np.isclose(lr_gold[i], lr_list[i])
예제 #5
0
    def from_relevance_scorer(
        cls,
        feature_config: FeatureConfig,
        interaction_model: InteractionModel,
        model_config: dict,
        loss: RelevanceLossBase,
        metrics: List[Union[kmetrics.Metric, str]],
        optimizer: Optimizer,
        tfrecord_type: str,
        file_io: FileIO,
        model_file: Optional[str] = None,
        initialize_layers_dict: dict = {},
        freeze_layers_list: list = [],
        compile_keras_model: bool = False,
        output_name: str = "score",
        logger=None,
    ):
        """
        Create a RelevanceModel with default Scorer function
        constructed from an InteractionModel

        Parameters
        ----------
        feature_config : `FeatureConfig` object
            FeatureConfig object that defines the features to be loaded in the dataset
            and the preprocessing functions to be applied to each of them
        tfrecord_type : {"example", "sequence_example"}
            Type of the TFRecord protobuf message used for TFRecordDataset
        file_io : `FileIO` object
            file I/O handler objects for reading and writing data
        interaction_model : `InteractionModel` object
            InteractionModel object that converts input features into a
            dense feature representation
        loss : `RelevanceLossBase` object
            Loss object defining the final activation layer and the loss function
        metrics : list
            List of keras Metric classes that will be used for evaluating the trained model
        optimizer : `Optimizer`
            Tensorflow keras optimizer to be used for training the model
        model_file : str, optional
            Path to pretrained model file to be loaded for evaluation or retraining
        initialize_layers_dict : dict, optional
            Dictionary of tensorflow layer names mapped to the path of pretrained weights
            Use this for transfer learning with pretrained weights
        freeze_layers_list : list, optional
            List of model layer names to be frozen
            Use this for freezing pretrained weights from other ml4ir models
        compile_keras_model : bool, optional
            Whether the keras model loaded from disk should be compiled
            with loss, metrics and an optimizer
        output_name : str, optional
            Name of the output tensorflow node that captures the score
        logger : `Logger`, optional
            logging handler for status messages

        Returns
        -------
        RelevanceModel
            RelevanceModel object with a default scorer build with a custom
            InteractionModel
        """
        assert isinstance(interaction_model, InteractionModel)
        assert isinstance(loss, RelevanceLossBase)

        scorer: ScorerBase = RelevanceScorer(
            model_config=model_config,
            interaction_model=interaction_model,
            loss=loss,
            output_name=output_name,
        )

        return cls(
            scorer=scorer,
            feature_config=feature_config,
            metrics=metrics,
            optimizer=optimizer,
            tfrecord_type=tfrecord_type,
            model_file=model_file,
            initialize_layers_dict=initialize_layers_dict,
            freeze_layers_list=freeze_layers_list,
            compile_keras_model=compile_keras_model,
            output_name=output_name,
            file_io=file_io,
            logger=logger,
        )
예제 #6
0
    def get_ranking_model(
        self,
        loss_key: str,
        metrics_keys: List,
        feature_config: FeatureConfig,
        feature_layer_keys_to_fns={},
    ) -> RelevanceModel:
        """
        Creates RankingModel

        NOTE: Override this method to create custom loss, scorer, model objects
        """

        # Define interaction model
        interaction_model: InteractionModel = UnivariateInteractionModel(
            feature_config=feature_config,
            feature_layer_keys_to_fns=feature_layer_keys_to_fns,
            tfrecord_type=self.args.tfrecord_type,
            max_sequence_size=self.args.max_sequence_size,
        )

        # Define loss object from loss key
        loss: RelevanceLossBase = loss_factory.get_loss(
            loss_key=loss_key, scoring_type=self.args.scoring_type)

        # Define scorer
        scorer: ScorerBase = RelevanceScorer.from_model_config_file(
            model_config_file=self.args.model_config,
            interaction_model=interaction_model,
            loss=loss,
            output_name=self.args.output_name,
        )

        # Define metrics objects from metrics keys
        metrics: List[Union[Type[Metric], str]] = [
            metric_factory.get_metric(metric_key=metric_key)
            for metric_key in metrics_keys
        ]

        # Define optimizer
        optimizer: Optimizer = get_optimizer(
            optimizer_key=self.args.optimizer_key,
            learning_rate=self.args.learning_rate,
            learning_rate_decay=self.args.learning_rate_decay,
            learning_rate_decay_steps=self.args.learning_rate_decay_steps,
            gradient_clip_value=self.args.gradient_clip_value,
        )

        # Combine the above to define a RelevanceModel
        relevance_model: RelevanceModel = RankingModel(
            feature_config=feature_config,
            tfrecord_type=self.args.tfrecord_type,
            scorer=scorer,
            metrics=metrics,
            optimizer=optimizer,
            model_file=self.args.model_file,
            compile_keras_model=self.args.compile_keras_model,
            output_name=self.args.output_name,
            logger=self.logger,
        )

        return relevance_model
예제 #7
0
    def test_reduce_lr_on_plateau_in_training_pipeline(self):
        """Test reduce lr on plateau"""
        self.model_config_file = MODEL_CONFIG_REDUCE_LR_ON_PLATEAU
        Logger = logging_utils.setup_logging(
            reset=True,
            file_name=os.path.join(INPUT_DIR + 'ranklib', "output_log.csv"),
            log_to_file=True,
        )

        io = LocalIO()
        feature_config = self.parse_config(TFRecordTypeKey.SEQUENCE_EXAMPLE, self.feature_config_yaml_convert_to_clicks,
                                           io)
        model_config = io.read_yaml(self.model_config_file)

        dataset = RelevanceDataset(
            data_dir=INPUT_DIR + '/ranklib',
            data_format=DataFormatKey.RANKLIB,
            feature_config=feature_config,
            tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE,
            batch_size=32,
            file_io=io,
            preprocessing_keys_to_fns={},
            logger=Logger,
            keep_additional_info=KEEP_ADDITIONAL_INFO,
            non_zero_features_only=NON_ZERO_FEATURES_ONLY,
            max_sequence_size=319,
        )

        # Define interaction model
        interaction_model: InteractionModel = UnivariateInteractionModel(
            feature_config=feature_config,
            feature_layer_keys_to_fns={},
            tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE,
            max_sequence_size=319,
            file_io=io,
        )

        # Define loss object from loss key
        loss: RelevanceLossBase = loss_factory.get_loss(
            loss_key=LossKey.RANK_ONE_LISTNET, scoring_type=ScoringTypeKey.POINTWISE
        )

        # Define scorer
        scorer: ScorerBase = RelevanceScorer.from_model_config_file(
            model_config_file=self.model_config_file,
            interaction_model=interaction_model,
            loss=loss,
            logger=Logger,
            file_io=io,
        )

        optimizer: Optimizer = get_optimizer(model_config=model_config)

        # Combine the above to define a RelevanceModel
        relevance_model: RelevanceModel = RankingModel(
            feature_config=feature_config,
            tfrecord_type=TFRecordTypeKey.SEQUENCE_EXAMPLE,
            scorer=scorer,
            optimizer=optimizer,
            model_file=None,
            file_io=io,
            logger=Logger,
        )
        callback_list = []
        callback_list.append(relevance_model.define_scheduler_as_callback(None, model_config))
        my_callback_object = LrCallback()
        callback_list.append(my_callback_object)

        history = relevance_model.model.fit(
            x=dataset.train.shard(2, 0),
            validation_data=dataset.validation.shard(2, 1),
            epochs=10,
            verbose=True,
            callbacks=callback_list,
        )
        lr_list = my_callback_object.get_lr_reduce_on_plateau_list()
        lr_gold = [50.0, 50.0, 25.0, 12.5, 6.25, 3.125, 1.5625, 1.0, 1.0, 1.0]

        assert np.all(np.isclose(lr_gold, lr_list))
예제 #8
0
파일: test_base.py 프로젝트: kiminh/ml4ir
    def get_ranking_model(
        self,
        loss_key: str,
        metrics_keys: List,
        feature_config: FeatureConfig,
        model_config: dict = {},
        feature_layer_keys_to_fns={},
        initialize_layers_dict={},
        freeze_layers_list=[],
    ) -> RelevanceModel:
        """
        Creates RankingModel

        NOTE: Override this method to create custom loss, scorer, model objects
        """

        # Define interaction model
        interaction_model: InteractionModel = UnivariateInteractionModel(
            feature_config=feature_config,
            feature_layer_keys_to_fns=feature_layer_keys_to_fns,
            tfrecord_type=self.args.tfrecord_type,
            max_sequence_size=self.args.max_sequence_size,
            file_io=self.file_io,
        )

        # Define loss object from loss key
        loss: RelevanceLossBase = loss_factory.get_loss(
            loss_key=loss_key, scoring_type=self.args.scoring_type)

        # Define scorer
        scorer: ScorerBase = RelevanceScorer(
            feature_config=feature_config,
            model_config=self.model_config,
            interaction_model=interaction_model,
            loss=loss,
            output_name=self.args.output_name,
            logger=self.logger,
            file_io=self.file_io,
        )

        # Define metrics objects from metrics keys
        metrics: List[Union[Type[Metric], str]] = [
            metric_factory.get_metric(metric_key=metric_key)
            for metric_key in metrics_keys
        ]

        # Define optimizer
        optimizer: Optimizer = get_optimizer(
            model_config=self.file_io.read_yaml(self.args.model_config), )

        # Combine the above to define a RelevanceModel
        if self.model_config["architecture_key"] == ArchitectureKey.LINEAR:
            RankingModelClass = LinearRankingModel
        else:
            RankingModelClass = RankingModel
        relevance_model: RelevanceModel = RankingModelClass(
            feature_config=feature_config,
            tfrecord_type=self.args.tfrecord_type,
            scorer=scorer,
            metrics=metrics,
            optimizer=optimizer,
            model_file=self.args.model_file,
            initialize_layers_dict=initialize_layers_dict,
            freeze_layers_list=freeze_layers_list,
            compile_keras_model=self.args.compile_keras_model,
            output_name=self.args.output_name,
            logger=self.logger,
            file_io=self.file_io,
        )

        return relevance_model