Python Booster Exemples, xgboost.Booster Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : app.py Projet : lakshmi-girija-m/Fraud-Detection

def get_results(X):
    categoricals = X.select_dtypes(include='object')
    categoricals = categoricals.astype(str)
    categoricals = categoricals.apply(label.fit_transform)
    label_encoding = categoricals['country']
    categoricals.drop(['country'], axis=1, inplace=True)
    X_one = enc.transform(categoricals)
    encoded_data = pd.DataFrame(X_one.todense())
    encoded_data.reset_index(drop=True, inplace=True)
    categoricals.reset_index(drop=True, inplace=True)

    original_numeric = X.select_dtypes(include='number')
    original_numeric.reset_index(drop=True, inplace=True)

    X = pd.concat([original_numeric, encoded_data, label_encoding],
                  axis=1).values
    Xp = pca.transform(X)

    clf = XGBClassifier()
    booster = Booster()
    booster.load_model('xgb.model')
    clf._Booster = booster
    classes = clf.predict_proba(Xp)
    y_pred = [0 if c[0] > 0.5 else 1 for c in classes]

    return y_pred

Exemple #2

0

Afficher le fichier

    def predict(self, booster: xgb.Booster, **kwargs):
        """
        Run local XGBoost prediction.

        Parameters
        ----------
        booster : xgboost.Booster
            A trained booster.
        **kwargs : dict
            Other parameters for `xgboost.Booster.predict`.

        Returns
        -------
        tuple
            Pair of IP address of caller and pandas.DataFrame
            with partial prediction result.
        """
        local_dpredict = self._dpredict
        booster.set_param({"nthread": self._nthreads})

        s = time.time()

        predictions = pandas.DataFrame(
            booster.predict(local_dpredict["dmatrix"], **kwargs),
            index=local_dpredict["index"],
        )
        LOGGER.info(f"Local prediction time: {time.time() - s} s")

        return get_node_ip_address(), predictions

Exemple #3

0

Afficher le fichier

def load_saved_attributes():
    global model

    model = XGBRegressor()
    booster = Booster()
    booster.load_model('./ny_taxi_fare')
    model._Booster = booster

Exemple #4

0

Afficher le fichier

    def analyze(self, event):
        array_list = [
            "lepJet_llpdnnx_-1_isLLP_QMU_QQMU",
            "lepJet_llpdnnx_0_isLLP_QMU_QQMU",
            "lepJet_llpdnnx_1_isLLP_QMU_QQMU",
            "lepJet_llpdnnx_2_isLLP_QMU_QQMU", "dimuon_mass", "dimuon_deltaR",
            "lepJet_pt", "lepJet_eta", "lepJet_deltaR", "MET_pt", "MET_phi",
            "looseMuons_pt", "looseMuons_eta", "looseMuons_dxy",
            "tightMuons_pt", "tightMuons_eta", "tightMuons_dxy"
        ]
        data = pd.DataFrame(data={
            "lepJet_llpdnnx_-1_isLLP_QMU_QQMU":
            getattr(event, "lepJet_llpdnnx_-1_isLLP_QMU_QQMU"),
            "lepJet_llpdnnx_0_isLLP_QMU_QQMU":
            event.lepJet_llpdnnx_0_isLLP_QMU_QQMU,
            "lepJet_llpdnnx_1_isLLP_QMU_QQMU":
            event.lepJet_llpdnnx_1_isLLP_QMU_QQMU,
            "lepJet_llpdnnx_2_isLLP_QMU_QQMU":
            event.lepJet_llpdnnx_2_isLLP_QMU_QQMU,
            "dimuon_mass":
            event.dimuon_mass,
            "dimuon_deltaR":
            event.dimuon_deltaR,
            "lepJet_pt":
            event.lepJet_pt,
            "lepJet_eta":
            event.lepJet_eta,
            "lepJet_deltaR":
            event.lepJet_deltaR,
            "MET_pt":
            event.MET_pt,
            "MET_phi":
            event.MET_phi,
            "looseMuons_pt":
            event.looseMuons_pt,
            "looseMuons_eta":
            event.looseMuons_eta,
            "looseMuons_dxy":
            event.looseMuons_dxy,
            "tightMuons_pt":
            event.tightMuons_pt,
            "tightMuons_eta":
            event.tightMuons_eta,
            "tightMuons_dxy":
            event.tightMuons_dxy,
        },
                            columns=array_list,
                            index=[0])

        model = XGBClassifier()
        booster = Booster()
        #model._le = LabelEncoder().fit([1])
        booster.load_model(self.modelPath)
        booster.feature_names = array_list
        model._Booster = booster
        bdt_score = model.predict_proba(data)
        setattr(event, "bdt_score", bdt_score[:, 1])
        return True

Exemple #5

0

Afficher le fichier

Fichier : dataset_tester.py Projet : timataran/django-sklearn_v2

 def get_model(cls, algorithm_name: str, model_path: str):
     if algorithm_name == 'xgboost':
         model = xgb.XGBClassifier()
         booster = Booster()
         booster.load_model(model_path)
         model._Booster = booster
     else:
         model = joblib.load(model_path)
     return model

Exemple #6

0

Afficher le fichier

Fichier : whereCond.py Projet : rohitshantarampatil/sql-nlp

def test(x,modelFile):
    model = Booster() #init model
    model.load_model(modelFile) # load data
    maps=np.load("Map.npy",allow_pickle=True)
    x_enc=encode([x])
    y_enc=model.predict(DMatrix(x_enc))
    y_pred=np.argmax(y_enc)
    inverseMap=maps.item().get("inverseMap")
    y_hat=inverseMap[y_pred]
    print(y_hat)

Exemple #7

0

Afficher le fichier

    def predict(self, booster: xgb.Booster, *args, **kwargs):
        local_dpredict = self._dpredict
        booster.set_param({"nthread": self._nthreads})

        s = time.time()
        predictions = [
            booster.predict(X, *args, **kwargs) for X in local_dpredict
        ]
        LOGGER.info(f"Local prediction time: {time.time() - s} s")
        return np.concatenate(predictions)

Exemple #8

0

Afficher le fichier

def train_xgb(X, y, params, save_path=None, save_path_booster=None):

    # the threshold is not handled by XGB interface
    params, binary_threshold = _parse_param_and_delete(params,
                                                       'binary_threshold', .5)

    # n_jobs is handled by XGB SKL interface
    params = _parse_param_and_keep(params,
                                   name='n_jobs',
                                   default=min(max_cpu_count(), 24))

    X = np.asarray(X)
    y = np.asarray(y).flatten()

    if not tuple(np.sort(np.unique(y))) == (0, 1):
        raise NotImplementedError(
            'XGB Wrapper currently only support biinary classification.')

    # Fit the model
    model = XGBClassifier(use_label_encoder=False, )
    model = clone(model)
    model.set_params(**params)

    logging.info('Training...')
    model.fit(
        X,
        y,
        # early_stopping_rounds=10,
        verbose=True,
    )
    # Save and re-load (feature-agnostic model)
    temp_file = f'temp-{time.time()}-{random.random()}.bin'
    model.get_booster().save_model(temp_file)
    booster = Booster(model_file=temp_file)
    os.remove(temp_file)

    if binary_threshold == 'auto':
        p_ = booster.predict(DMatrix(X))
        p_ = np.sort(p_)
        binary_threshold = p_[int((y == 0).sum())]

    logging.info(f'Using a binary_threshold = {binary_threshold}')

    # Wrap
    model = XGBClassifierSKLWrapper(booster,
                                    features=X.shape[1],
                                    threshold=binary_threshold)

    # Save
    if save_path is not None:
        save_pickle(model, save_path)
    if save_path_booster is not None:
        save_pickle(model.get_booster(), save_path_booster)
    return model

Exemple #9

0

Afficher le fichier

Fichier : xgboost_ray.py Projet : yangl235/modin

    def predict(self, booster: xgb.Booster, **kwargs):
        local_dpredict = self._dpredict
        booster.set_param({"nthread": self._nthreads})

        s = time.time()
        predictions = [
            pandas.DataFrame(booster.predict(X, **kwargs))
            for X in local_dpredict
        ]
        LOGGER.info(f"Local prediction time: {time.time() - s} s")
        return predictions if len(predictions) > 1 else predictions[0]

Exemple #10

0

Afficher le fichier

Fichier : mri_engine.py Projet : xiabofei/python_details

def create_predictor_infos():
    word_index = {}
    n_tokens = 0
    with open('../data/output/word_frequency.pkl', 'rb') as f:
        word_frequency = cPickle.load(f)
        assert type(word_frequency) == dict
        for k, v in sorted(word_frequency.items(), key=lambda x: x[1]):
            if v > THRESHOLD_FREQ:
                word_index[k] = n_tokens
                n_tokens += 1
    bst = Booster()
    bst.load_model('../data/model/xgboost_model.model')
    return word_index, n_tokens, bst

Exemple #11

0

Afficher le fichier

    def predict(self, booster: xgb.Booster, **kwargs):
        local_dpredict = self._dpredict
        booster.set_param({"nthread": self._nthreads})

        s = time.time()

        predictions = pandas.DataFrame(
            booster.predict(local_dpredict["dmatrix"], **kwargs),
            index=local_dpredict["index"],
        )
        LOGGER.info(f"Local prediction time: {time.time() - s} s")

        return get_node_ip_address(), predictions

Exemple #12

0

Afficher le fichier

    def test_it_can_override_an_existing_one(self) -> None:
        station_id = uuid.uuid4()

        first = StationAvailabilityAlgorithm(
            station_id, DataFrame(['data_1', 'frame_1', 'test_1']), Booster())
        self._repository.save(first)

        override = StationAvailabilityAlgorithm(
            station_id, DataFrame(['data_2', 'frame_2', 'test_2']), Booster())
        self._repository.save(override)

        self.assertEqual(self._repository.find_by_station_id(station_id),
                         override)

Exemple #13

0

Afficher le fichier

def xgb_latest() -> (Booster, Dict[str, pandas.Categorical]):
    base_path = '/var/opt/pcsml/devel/training_data/dumps/debug004/2017-12-27T18-30-59'

    model = Booster()
    model.load_model(os.path.join(base_path, 'model_2017-12-27T18-30-59.xgb'))

    with gzip.open(
            os.path.join(
                base_path,
                'model_2017-12-27T18-30-59_column_categories.pickle.gz'),
            'rb') as f:
        column_categories = pickle.load(f)

    return model, column_categories

Exemple #14

0

Afficher le fichier

Fichier : XGB多因子策略.py Projet : jyonsi/mycollection

def select_stocks(context, data):
    #clf = pickle.load(BytesIO(read_file('xgb_factors_model_ZZ800_D.model')))
    #file = read_file('xgb_factors_model_ZZ800_D.model')
    #clf = Booster.load_model(fname = BytesIO(read_file('xgb_factors_model_ZZ800_D.model')))
    with open('temp', 'wb') as f:
        f.write(read_file('xgb_factors.model'))  #储存一个临时文件,进程结束后清理
    clf = Booster(model_file='temp')
    #clf = Booster.load_model(fname = 'temp')
    industry_old_code = ['801010','801020','801030','801040','801050','801080','801110','801120','801130','801140','801150',\
                    '801160','801170','801180','801200','801210','801230']
    industry_new_code = ['801010','801020','801030','801040','801050','801080','801110','801120','801130','801140','801150',\
                    '801160','801170','801180','801200','801210','801230','801710','801720','801730','801740','801750',\
                   '801760','801770','801780','801790','801880','801890']
    starttime = datetime.datetime.now()
    date = context.previous_date
    #获取行业因子数据
    print('获取数据的日期：', date)
    '''
    if datetime.datetime.strptime(date,"%Y-%m-%d").date()<datetime.date(2014,2,21):
        industry_code=industry_old_code
    else:
    '''
    industry_code = industry_new_code
    stockList = get_stock('ZZ800', date)
    factor_origl_data = get_factor_data(stockList, date)
    factor_solve_data = data_preprocessing(factor_origl_data, stockList,
                                           industry_code, date)
    endtime = datetime.datetime.now()
    print('取数运行时长：', int((endtime - starttime).seconds / 60), '分钟')
    test_feature_or = factor_solve_data.copy()
    test_feature = np.array(test_feature_or)
    # 模型预测
    test_predict = clf.predict(DMatrix(test_feature_or))
    test_sample_predict = pd.DataFrame(data=test_predict,
                                       index=test_feature_or.index,
                                       columns=[
                                           'XGB_predict_0', 'XGB_predict_1',
                                           'XGB_predict_2', 'XGB_predict_3',
                                           'XGB_predict_4', 'XGB_predict_5',
                                           'XGB_predict_6', 'XGB_predict_7',
                                           'XGB_predict_8', 'XGB_predict_9',
                                           'XGB_predict_10', 'XGB_predict_11'
                                       ])
    #test_sample_predict['XGB_predict_0_and_1'] = test_sample_predict['XGB_predict_0'] + test_sample_predict['XGB_predict_1']
    test_sample_predict = test_sample_predict.sort_values(by='XGB_predict_0',
                                                          ascending=False)
    stock_list = test_sample_predict.index.values.tolist()
    stock_list = stock_list[:g.buy_stock_count]
    return stock_list

Exemple #15

0

Afficher le fichier

Fichier : memsql_udf.py Projet : memsql/memsql-xgboost

def upload_xgb_to_memsql(xgb: Booster,
                         conn: Connection,
                         udf_name: str,
                         func=F.SIGMOID,
                         feature_names: List[str] = None,
                         allow_overwrite: bool = False) -> None:
    if feature_names:
        xgb.feature_names = feature_names
    trees = split_trees(xgb.trees_to_dataframe())
    sqls = [tree_to_func_def(udf_name, allow_overwrite, t) for t in trees]
    sqls.append(
        tree_to_main_func(udf_name, allow_overwrite, trees, xgb.feature_names,
                          func))
    for s in sqls:
        assert 1 == conn.query(s)

Exemple #16

0

Afficher le fichier

Fichier : ml.py Projet : juanifarias95/ml_tech_interview

    def test_model(self, model: xgb.Booster, df: pd.DataFrame) -> (str, float):
        num_cols = [
            "shipping_free", "price", "accepts_mercadopago",
            "automatic_relist", "initial_quantity", "sold_quantity",
            "available_quantity", "quantity"
        ]

        cat_cols = [
            c for c in df.columns if c not in num_cols and c not in ("target")
        ]

        df = self.feature_engineer(df, cat_cols)

        features = [f for f in df.columns if f not in ("target")]

        for col in features:
            if col not in num_cols:
                lbl = LabelEncoder()
                lbl.fit(df[col])
                df.loc[:, col] = lbl.transform(df[col])

        X = df.drop(["target"], axis=1).values
        y = df.target.values

        preds = model.predict_proba(X)[:, 1]

        auc = metrics.roc_auc_score(y, preds)

        logger.info(f"AUC Test = {auc}")

Exemple #17

0

Afficher le fichier

def load_or_create(objective='multi:softprob',
                   max_depth=2,
                   seed=4242,
                   eval_metric='merror',
                   num_class=4520,
                   num_feature=256,
                   **kwargs):

    if from_scratch:
        print_info('Creating XGB Boosted Tree')
        params = {
            'updater': 'grow_gpu',
            'predictor': 'gpu_predictor',
            'tree_method': 'gpu_hist',
            'eval_metric': eval_metric,
            'objective': objective,
            'num_class': num_class,
            'max_depth': max_depth,
            'seed': seed,
            'num_feature': num_feature
        }

        params = {**params, **kwargs}

        model = Booster(params, )
    else:
        model = load_model()

    return model

Exemple #18

0

Afficher le fichier

Fichier : predict.py Projet : ak-gupta/nbaspa

    def _run_xgboost(model: xgb.Booster, data: pd.DataFrame) -> pd.DataFrame:
        """Retrieve the win probability.

        Parameters
        ----------
        model : xgb.Booster
            The fitted XGBoost model.
        data : pd.DataFrame
            The input dataset to be evaluated.

        Returns
        -------
        np.ndarray
            The updated dataset.
        """
        # First, get the partial hazard values
        hazard = model.predict(
            xgb.DMatrix(data[META["static"] + META["dynamic"]]))
        # Get the cumulative probability
        c0 = interpolate_at_times(model.cumulative_hazard_,
                                  data["stop"].values)
        new = data.copy()
        new[META["survival"]] = 1 - np.exp(-(c0 * hazard))

        return new

Exemple #19

0

Afficher le fichier

Fichier : xgboost_utils.py Projet : bmmalone/pyllars

def xgbooster_predict_proba(booster: xgb.Booster,
                            d_x: xgb.DMatrix) -> np.ndarray:
    """ Simulate the `predict_proba` interface from sklearn
    
    This function will only work as expected if `booster` has been
    training using the `binary:logistic` loss.
    
    Parameters
    ----------
    booster : xgboost.Booster
        The trained booster
        
    d_x : xgboost.DMatrix
        The dataset
        
    Returns
    -------
    y_proba_pred : numpy.ndarray
        The probabilistic predictions. The shape of the array
        is (n_row, 2).
    """
    y_score = booster.predict(d_x)
    y_false = 1 - y_score
    size = (d_x.num_row(), 2)

    y_probas_pred = np.zeros(size)
    y_probas_pred[:, 0] = y_false
    y_probas_pred[:, 1] = y_score

    return y_probas_pred

Exemple #20

0

Afficher le fichier

def load_xgb_model(fname):
    """ Load a XGBoost model that was saved as a file with
        the HyperXGBClassifier.save method.
        
        The model is span on two files:
            
            * The first file contains the model saved with the Booster class,
            this file have no extension.
            
            * The second file contains the parameters used to create the model,
            this file have the extension '.p'.
            
        Parameters
        ----------
        fname : path 
                The file name without extension.
        """
    from xgboost import Booster
    params = pickle.load(open(fname + '.p', "rb"))
    n_classes = params['meta']['n_classes']
    param_map = params['param_map']
    model = HyperXGBClassifier(**param_map)
    model.set_n_labels(n_classes - 1)
    y = [i for i in range(n_classes)]
    model.set_le(y)
    model._Booster = Booster(model_file=fname)
    return model

Exemple #21

0

Afficher le fichier

Fichier : xgb_quantile.py Projet : alliander-opensource/short-term-forecasting

    def get_feature_importances_from_booster(cls,
                                             booster: Booster) -> np.ndarray:
        """Gets feauture importances from a XGB booster.
            This is based on the feature_importance_ property defined in:
            https://github.com/dmlc/xgboost/blob/master/python-package/xgboost/sklearn.py

        Args:
            booster(Booster): Booster object,
            most of the times the median model (quantile=0.5) is preferred

        Returns:
            (np.ndarray) with normalized feature importances

        """

        # Get score
        score = booster.get_score(importance_type="gain")

        # Get feature names from booster
        feature_names = booster.feature_names

        # Get importance
        feature_importance = [score.get(f, 0.0) for f in feature_names]
        # Convert to array
        features_importance_array = np.array(feature_importance,
                                             dtype=np.float32)

        total = features_importance_array.sum()  # For normalizing
        if total == 0:
            return features_importance_array
        return features_importance_array / total  # Normalize

Exemple #22

0

Afficher le fichier

    def test_it_can_not_find_one_by_station_id_if_it_does_not_exist(
            self) -> None:
        station_availability_algorithm = StationAvailabilityAlgorithm(
            uuid.uuid4(), DataFrame(['data', 'frame', 'test']), Booster())

        self._repository.save(station_availability_algorithm)

        self.assertIsNone(self._repository.find_by_station_id(uuid.uuid4()))

Exemple #23

0

Afficher le fichier

Fichier : test_survival.py Projet : yupbank/xgboost

 def after_iteration(
     self, model: xgb.Booster,
     epoch: int,
     evals_log: xgb.callback.TrainingCallback.EvalsLog
 ):
     y_pred = model.predict(dmat)
     acc = np.sum(np.logical_and(y_pred >= y_lower, y_pred <= y_upper)/len(X))
     acc_rec.append(acc)
     return False

Exemple #24

0

Afficher le fichier

Fichier : main.py Projet : amogkam/xgboost_ray

    def predict(self, model: xgb.Booster, data: RayDMatrix, **kwargs):
        _set_omp_num_threads()

        if data not in self._data:
            self.load_data(data)
        local_data = self._data[data]

        predictions = model.predict(local_data, **kwargs)
        return predictions

Exemple #25

0

Afficher le fichier

Fichier : xgb_util.py Projet : dfaivre/python-ml-poc-2018

def merge_labeled_weight_importance(model: Booster, label_encoder: OneHotLabelEncoder) -> Dict:
    f_imp = model.get_score(importance_type='weight')

    merged: Dict[str, int] = {}
    for f in f_imp:
        src_feature = label_encoder.source_column(f)
        merged[src_feature] = merged.get(src_feature, 0) + f_imp[f]

    return merged

Exemple #26

0

Afficher le fichier

    def test_it_can_find_one_by_station_id(self) -> None:
        station_id = uuid.uuid4()
        station_availability_algorithm = StationAvailabilityAlgorithm(
            station_id, DataFrame(['data', 'frame', 'test']), Booster())

        self._repository.save(station_availability_algorithm)

        self.assertEqual(self._repository.find_by_station_id(station_id),
                         station_availability_algorithm)

Exemple #27

0

Afficher le fichier

def merge_labeled_weight_importance(
        model: Booster,
        dummy_col_sep=categorical_util.DUMMY_COL_SEP) -> Dict[str, int]:
    f_imp = model.get_score(importance_type='weight')

    merged: Dict[str, int] = {}
    for f in f_imp:
        src_feature = categorical_util.get_source_name_from_dummy(
            f, dummy_col_sep)
        merged[src_feature] = merged.get(src_feature, 0) + f_imp[f]

    return merged

Exemple #28

0

Afficher le fichier

Fichier : SFAirbnbPricePrediction.py Projet : rishabh7795/San-Francisco-Airbnb-Price-Prediction

def load_saved_attributes():

    global host_response_time_values
    global neighbourhood_values
    global property_type_values
    global room_type_values
    global cancellation_policy_values
    global model

    with open("columns.json", "r") as f:
        resp = json.load(f)
        host_response_time_values = resp["host_response_time"]
        neighbourhood_values = resp["neighbourhood"]
        property_type_values = resp["property_type"]
        room_type_values = resp["room_type"]
        cancellation_policy_values = resp["cancellation_policy"]

    model = XGBRegressor()
    booster = Booster()
    booster.load_model('airbnb_price_predictor')
    model._Booster = booster

Exemple #29

0

Afficher le fichier

Fichier : predxgboost.py Projet : Nance-Lab/diff_predictor

def load(filename):
    '''
    Loads in an xgboost model from the given file location
    Parameters
    ----------
    filename : string
        path of model file to be loaded
    Returns
    -------
    booster : xgboost.Booster()
        model that is loaded
    metadata : dict
        parameter metadata for model in the form of json data.
        Use get_params() function to use in model prediction.
    '''
    booster = Booster({'nthread': 4})
    # Check if model file exists as it has been writen by user.
    # If not, add model_ to filename like it designated in save()
    if not path.exists(filename):
        model_file = filename.split('/')
        model_file[-1] = 'model_' + model_file[-1]
        model_file = '/'.join(model_file)
    else:
        model_file = filename
    config_file = model_file.replace('model_', 'config_')
    booster.load_model(model_file)
    with open(config_file, 'r', encoding='utf-8') as f:
        config = f.read()
        config = json.loads(config)
    booster.load_config(config)
    metadata = json.loads(config)
    return booster, metadata

Exemple #30

0

Afficher le fichier

    def from_model(
        cls,
        booster: xgboost.Booster,
        *,
        path: os.PathLike,
        preprocessor: Optional["Preprocessor"] = None,
    ) -> "XGBoostCheckpoint":
        """Create a :py:class:`~ray.air.checkpoint.Checkpoint` that stores an XGBoost
        model.

        Args:
            booster: The XGBoost model to store in the checkpoint.
            path: The directory where the checkpoint will be stored.
            preprocessor: A fitted preprocessor to be applied before inference.

        Returns:
            An :py:class:`XGBoostCheckpoint` containing the specified ``Estimator``.

        Examples:
            >>> from ray.train.xgboost import XGBoostCheckpoint
            >>> import xgboost
            >>>
            >>> booster = xgboost.Booster()
            >>> checkpoint = XGBoostCheckpoint.from_model(booster, path=".")  # doctest: +SKIP # noqa: E501

            You can use a :py:class:`XGBoostCheckpoint` to create an
            :py:class:`~ray.train.xgboost.XGBoostPredictor` and preform inference.

            >>> from ray.train.xgboost import XGBoostPredictor
            >>>
            >>> predictor = XGBoostPredictor.from_checkpoint(checkpoint)  # doctest: +SKIP # noqa: E501
        """
        booster.save_model(os.path.join(path, MODEL_KEY))

        if preprocessor:
            save_preprocessor_to_dir(preprocessor, path)

        checkpoint = cls.from_directory(path)

        return checkpoint

Exemple #31

0

Afficher le fichier

Fichier : utils.py Projet : vishalbelsare/ray

def to_air_checkpoint(
    path: str,
    booster: xgboost.Booster,
    preprocessor: Optional["Preprocessor"] = None,
) -> Checkpoint:
    """Convert a pretrained model to AIR checkpoint for serve or inference.

    Args:
        path: The directory path where model and preprocessor steps are stored to.
        booster: A pretrained xgboost model.
        preprocessor: A fitted preprocessor. The preprocessing logic will
            be applied to serve/inference.
    Returns:
        A Ray Air checkpoint.
    """
    booster.save_model(os.path.join(path, MODEL_KEY))

    if preprocessor:
        save_preprocessor_to_dir(preprocessor, path)

    checkpoint = Checkpoint.from_directory(path)

    return checkpoint

Exemple #32

0

Afficher le fichier

Fichier : clf_xgboost.py Projet : chrinide/kaggle_otto_group

def my_train_xgboost(params, dtrain, num_boost_round=10, evals=(), obj=None, 
                     feval=None, early_stopping_rounds=None, seed=0, 
                     rt_eta=1.0006, rt_ssp=1.0006, rt_clb=1.0006, 
                     rt_dpt=1.0001):
    """
    Train a booster with given parameters.

    Parameters
    ----------
    params : dict
        Booster params.
    dtrain : DMatrix
        Data to be trained.
    num_boost_round: int
        Number of boosting iterations.
    watchlist : list of pairs (DMatrix, string)
        List of items to be evaluated during training, this allows user to watch
        performance on the validation set.
    obj : function
        Customized objective function.
    feval : function
        Customized evaluation function.
    early_stopping_rounds: int
        Activates early stopping. Validation error needs to decrease at least
        every <early_stopping_rounds> round(s) to continue training.
        Requires at least one item in evals.
        If there's more than one, will use the last.
        Returns the model from the last iteration (not the best one).
        If early stopping occurs, the model will have two additional fields:
        bst.best_score and bst.best_iteration.

    Returns
    -------
    booster : a trained booster model
    """
    eta = params['eta']   

    ssp = params['subsample']
    clb = params['colsample_bytree']
    
#    rt_eta=np.random.random()
    rt_ssp=np.random.uniform(0.1,0.9)
    rt_clb=np.random.uniform(0.1,0.9)
    

    evals = list(evals)
    bst = Booster(params, [dtrain] + [d[0] for d in evals], seed=seed)

    if not early_stopping_rounds:
        for i in range(num_boost_round):
            bst.set_param({'eta': eta})
            bst.set_param({'subsample': ssp})
            bst.set_param({'colsample_bytree': clb})
            eta = eta * rt_eta
#            ssp = ssp * rt_ssp
#            clb = clb * rt_clb
            ssp = rt_ssp
            clb = rt_clb
            bst.update(dtrain, i, obj)
            if len(evals) != 0:
                bst_eval_set = bst.eval_set(evals, i, feval)
                if isinstance(bst_eval_set, string_types):
                    sys.stderr.write(bst_eval_set + '\n')
                else:
                    sys.stderr.write(bst_eval_set.decode() + '\n')
        return bst

    else:
        # early stopping

        if len(evals) < 1:
            raise ValueError('For early stopping you need at least on set in evals.')

        sys.stderr.write("Will train until {} error hasn't decreased in {} rounds.\n".format(evals[-1][1], early_stopping_rounds))

        # is params a list of tuples? are we using multiple eval metrics?
        if type(params) == list:
            if len(params) != len(dict(params).items()):
                raise ValueError('Check your params. Early stopping works with single eval metric only.')
            params = dict(params)

        # either minimize loss or maximize AUC/MAP/NDCG
        maximize_score = False
        if 'eval_metric' in params:
            maximize_metrics = ('auc', 'map', 'ndcg')
            if filter(lambda x: params['eval_metric'].startswith(x), maximize_metrics):
                maximize_score = True

        if maximize_score:
            best_score = 0.0
        else:
            best_score = float('inf')

        best_msg = ''
        best_score_i = 0

        for i in range(num_boost_round):
            bst.set_param({'eta': eta})
            bst.set_param({'subsample': ssp})
            bst.set_param({'colsample_bytree': clb})
            eta = eta * rt_eta
#            ssp = ssp * rt_ssp
#            clb = clb * rt_clb
            ssp = rt_ssp
            clb = rt_clb
            bst.update(dtrain, i, obj)
            bst_eval_set = bst.eval_set(evals, i, feval)

            if isinstance(bst_eval_set, string_types):
                msg = bst_eval_set
            else:
                msg = bst_eval_set.decode()

            sys.stderr.write(msg + '\n')

            score = float(msg.rsplit(':', 1)[1])
            if (maximize_score and score > best_score) or \
                    (not maximize_score and score < best_score):
                best_score = score
                best_score_i = i
                best_msg = msg
            elif i - best_score_i >= early_stopping_rounds:
                sys.stderr.write("Stopping. Best iteration:\n{}\n\n".format(best_msg))
                bst.best_score = best_score
                bst.best_iteration = best_score_i
                return bst

        return bst