예제 #1
0
 def write_binaries(rows):
     # row is dict from split name (train/test) to path data can be found
     for pair in rows:
         i, row = pair
         with as_local_paths(row.values(), with_query=True) as local_inputs, \
                 as_output_files([path + '.xgb' for path in row.values()]) as local_outputs:
             for local_input, local_output in zip(local_inputs, local_outputs):
                 write_xgb(local_input, local_output.name)
예제 #2
0
    def trainWithFiles(fold: Mapping[str, str], train_matrix: str,
                       params: Mapping[str, Any], **kwargs) -> 'XGBoostModel':
        """Wrapper around xgb.train

        This intentionally forwards to trainWithRDD, rather than
        trainWithDataFrame, as the underlying method currently prevents using
        rank:pairwise and metrics with @, such as ndcg@5.

        Parameters
        ----------
        fold :
            Map from split name to data path. All provided splits will be
            evaluated on each boosting iteration.
        train_matrix: str
            name of split in fold to train against
        params : dict
            XGBoost training parameters

        Returns
        -------
        mjolnir.training.xgboost.XGBoostModel
            trained xgboost ranking model
        """
        with as_local_paths(fold.values()) as local_paths:
            matrices = {
                name: xgb.DMatrix(path)
                for name, path in zip(fold.keys(), local_paths)
            }
            dtrain = matrices[train_matrix]
            evallist = [(dmat, name) for name, dmat in matrices.items()]
            metrics = cast(Mapping, {})
            booster = xgb.train(params,
                                dtrain,
                                evals=evallist,
                                evals_result=metrics,
                                **kwargs)
            return XGBoostModel(booster, metrics, params)