Esempi in Python per XGBClassifier.load_model, esempi in Python per xgboost.XGBClassifier.load_model

Esempio n. 1

0

Mostra file

class XGB(BaseModel):
    def __init__(self):
        self.clf = XGBClassifier(
            n_estimators=200,
            max_depth=20,
            learning_rate=0.1,
            random_state=0,
            booster="gbtree",
            use_label_encoder=False,
        )

    def train(self, X_train, Y_train):
        X_train, Y_train = do_rebalance(X_train, Y_train)
        self.clf.fit(X_train, Y_train)

    def test(self, X_test, Y_test):
        Y_prob = self.clf.predict_proba(X_test)
        auc = metrics.roc_auc_score(Y_test, Y_prob[:, 1])

    def predict(self, X):
        Y_prob = self.clf.predict_proba(X)
        return Y_prob

    def load_model(self, model_path):
        self.clf.load_model(model_path)
        # with open(model_path, "rb+") as file:
        #     self.clf = pickle.load(file)

    def save_model(self, model_path):
        self.clf.save_model(model_path)

Esempio n. 2

0

Mostra file

class WrappedXGBClassifier(WrappedModel):
    def base_init_finished(self):
        self.reset()

    def fit(self, X, y):
        self._value.fit(X, y, **self._fit_kwargs)
        return self

    def reset(self):
        from xgboost import XGBClassifier
        self._value = XGBClassifier(**self._init_kwargs)

    def predict(self, X):
        return self._value.predict(X)

    def predict_proba(self, X):
        if self._pos_index is None:
            raise Exception('predict_proba need pos_index')
        return self._value.predict_proba(X)[:, self._pos_index]

    def dump(self, dirpath, name):
        self.value.save_model(pathjoin(dirpath, name + '.bin'))
        return self

    def load(self, dirpath, name):
        self._value.load_model(pathjoin(dirpath, name + '.bin'))
        return self

Esempio n. 3

0

Mostra file

class XGBoost_Ranker():
    def __init__(self, timestamp, load=True):
        self.model = XGBClassifier()
        self.model.load_model(timestamp + '.file')
        self.factor = 1.0

    def set_factor(self, factor):
        self.factor = factor

    def rank_features(self, features):
        _features = np.copy(features)
        for f in _features:
            f[1] *= self.factor
            f[4] *= self.factor
            f[5] *= self.factor
        # return np.array([0, 1, 2, 3, 4])

        test_x = []
        for i in range(len(_features)):
            for j in range(len(_features)):
                if i == j:
                    continue
                test_x.append(
                    np.concatenate((_features[i], _features[j]), axis=0))

        test_x = np.array(test_x)
        print(test_x.shape)
        y = self.model.predict(test_x).reshape(len(_features),
                                               len(_features) - 1)
        y = np.sum(y, axis=1)
        # print(y)
        return np.argsort(y)[::-1]

Esempio n. 4

0

Mostra file

File: classifier.py Progetto: hannorein/spock

class StabilityClassifier():
    def __init__(self, modelfile='spock.json'):
        pwd = os.path.dirname(__file__)
        self.model = XGBClassifier()
        self.model.load_model(pwd + '/models/'+modelfile)

    def check_errors(self, sim):
        if sim.N_real < 4:
            raise AttributeError("SPOCK Error: SPOCK only applicable to systems with 3 or more planets") 
        
    def predict_stable(self, sim):
        triofeatures, stable = self.generate_features(sim)
        if stable == False:
            return 0
       
        trioprobs = self.predict_from_features(triofeatures)
        return trioprobs.min()          # minimum prob among all trios tested

    def generate_features(self, sim):
        sim = sim.copy()
        init_sim_parameters(sim)
        self.check_errors(sim)
        
        trios = [[i,i+1,i+2] for i in range(1,sim.N_real-2)] # list of adjacent trios   
        featureargs = [10000, 80, trios]
        triofeatures, stable = features(sim, featureargs)
        
        return triofeatures, stable

    def predict_from_features(self, triofeatures):
        # xgboost model expects a 2D array of shape (Npred, Nfeatures) where Npred is number of samples to predict, Nfeatures is # of features per sample
        featurevals = np.array([[val for val in features.values()] for features in triofeatures])
        return self.model.predict_proba(featurevals)[:,1] # take 2nd column for probability it belongs to stable class

Esempio n. 5

0

Mostra file

def load_modele(path):
    '''Renvoie le modele en tant qu\'objet à partir du chemin'''
    if 'GradientBoosting' in str(path):
        #print('Chargement XGBOOST')
        model = XGBClassifier()
        model.load_model(path)
        return model
    else:
        #print('Chargement Pickle')
        return pickle.load(open(path, 'rb'))

Esempio n. 6

0

Mostra file

File: train_forest.py Progetto: anthonytec2/datawall

def export_model(amnt_data, client):
    model = XGBClassifier()
    model.load_model('boa.model')
    output = open('model.pb', 'wb')
    pickle.dump([model, amnt_data], output)
    output.close()
    bucket_test = client.get_bucket('traina-data')
    blob_test = bucket_test.blob('model.pb')
    blob_test.upload_from_filename(filename='model.pb')
    os.remove('model.pb')
    print(Fore.GREEN+'Exported Model Sucessfully')
    return

Esempio n. 7

0

Mostra file

def get_model():

    param_path = os.path.join(STORAGE, "params.json")
    with open(param_path, "r") as f:
        json_data = f.read()
    params = dict(json.loads(json_data))

    model = XGBClassifier(**params)
    model_path = os.path.join(STORAGE, "model.xgb")
    model.load_model(model_path)

    return model

Esempio n. 8

0

Mostra file

File: process_setup.py Progetto: stevenyesz/Cosmos

class ProcessPlugin(WorkerPlugin):
    def __init__(self,
                 cfg_path=os.environ.get("MODEL_CONFIG"),
                 weights_path=os.environ.get("PP_WEIGHTS_PTH"),
                 classes_path=os.environ.get("CLASSES_PTH")):
        self.cfg_path = cfg_path
        postprocess_weights_pth = weights_path
        self.postprocess_model = XGBClassifier()
        self.postprocess_model.load_model(postprocess_weights_pth)
        self.classes_pth = classes_path
        with open(self.classes_pth) as stream:
            self.classes = yaml.load(stream)["CLASSES"]

Esempio n. 9

0

Mostra file

def train_lazy():
    # Load the dataset
    X, y = load_data()
    # Split the data
    X_train, X_val, y_train, y_val = split_dataset(X, y)
    # # Normalize
    X_train = normalize(X_train)
    X_val = normalize(X_val)

    # uncomment to check the performance of the 25 models
    # clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
    # # fit
    # scores,_ = clf.fit(X_train, X_val, y_train, y_val)
    # # print
    # print(scores)

    # Final model
    # check if model exist
    if os.path.isfile(config.MODEL_PATH):
        model = XGBClassifier()
        model.load_model(config.MODEL_PATH)
    else:
        model = XGBClassifier()
        model.fit(X_train,
                  y_train,
                  eval_metric="error",
                  eval_set=[(X_train, y_train), (X_val, y_val)],
                  verbose=True)
        # save model
        model.save_model(config.MODEL_PATH)
    # performance on train set
    y_pred = model.predict(X_train)
    # evaluate predictions
    print_performance(y_train, y_pred, 'train')

    # performance on val set
    y_pred = model.predict(X_val)
    # evaluate predictions
    print_performance(y_val, y_pred, 'val')

    # Load the test dataset
    X_test, y_test = load_test_data()
    # # Normalize
    X_test = normalize(X_test)
    # get prediction
    y_pred = model.predict(X_test)
    # evaluate predictions
    print_performance(y_test, y_pred, 'test')
    # print
    plot_performance(model)

Esempio n. 10

0

Mostra file

File: probability_prediction.py Progetto: lukasztroc/ShouldWeSurrender15

def predict_probability_of_winning(gold_diff_at_10, exp_diff_at_10, team):

    dirname = os.path.dirname(__file__)
    os.path.join(dirname, 'djangoapp/chart/utils/models/red_model.json')
    model = XGBClassifier()

    model.load_model(os.path.join(dirname, f'./models/{team}_model.json'))

    df = pd.DataFrame({
        team + 'GoldDiff': [gold_diff_at_10],
        team + 'ExperienceDiff': [exp_diff_at_10]
    })
    # values are casted into lists because pandas constructor doesnt allow scalars
    predicts = model.predict_proba(df)

    for i, col in enumerate(['redWin', 'blueWin']):
        df[col] = predicts[:, i]
    return df

Esempio n. 11

0

Mostra file

File: aws.py Progetto: huangmingg/bt4103-junyi-backend

def generate_shap_html(feature, user_bin, user_id):
    xgb_clf = XGBClassifier()
    xgb_clf.load_model(os.path.join(MODEL_DIRECTORY, "xgb.model"))
    explainer = shap.TreeExplainer(xgb_clf)
    values = explainer.shap_values(feature)
    shap.initjs()
    fp = shap.force_plot(explainer.expected_value[user_bin - 1],
                         values[user_bin - 1][0],
                         feature,
                         show=False)

    shap.save_html(os.path.join(MODEL_DIRECTORY, f"User_{user_id}.html"), fp)
    with open(os.path.join(MODEL_DIRECTORY, f"User_{user_id}.html"),
              "r",
              encoding='utf-8') as f:
        html = f.read()
    os.remove(os.path.join(MODEL_DIRECTORY, f"User_{user_id}.html"))
    return str(html), values

Esempio n. 12

0

Mostra file

def load_model_and_generate_evaluation_images(*, model_filename, input_path,
                                              output_path, feature_names):
    model = XGBClassifier()
    model.load_model(model_filename)

    frame_folders = sorted(get_frame_folders(input_path))

    for frame_folder in frame_folders:
        frame_path = os.path.join(input_path, frame_folder)
        segment_names = [
            name for name in os.listdir(frame_path) if name[1].isdigit()
        ]

        if len(segment_names) != 0:
            continue

        for camera_name in ["60", "180", "300"]:
            image_name = "camera" + camera_name + ".png"
            print(frame_path + "/" + image_name)
            image_bgr = cv.imread(os.path.join(frame_path, image_name))
            features, shape = create_features(image_bgr=image_bgr,
                                              flatten=True)

            X = pd.DataFrame(features)[feature_names]
            y = model.predict(X)

            segments = y.reshape(shape)

            segments_bgr = [class2bgr(idx) for idx in segments.flatten()]
            segments_bgr = np.array(segments_bgr).reshape(*shape, 3)

            path = os.path.join(output_path, frame_folder)
            if not os.path.exists(path):
                os.makedirs(path)

            image_and_segments_bgr = np.concatenate([image_bgr, segments_bgr],
                                                    axis=1)
            # cv.imwrite(filename=os.path.join(path, image_name),
            #            img=image_bgr)
            segments_filename = "camera" + camera_name + "_segments" + ".png"
            cv.imwrite(
                filename=os.path.join(path, segments_filename),
                img=image_and_segments_bgr,
            )

Esempio n. 13

0

Mostra file

File: train_xgb_tree.py Progetto: open-dynamic-robot-initiative/trifinger_object_tracking

def load_model_and_generate_evaluation_images(
    model_filename,
    input_path: pathlib.Path,
    output_path: pathlib.Path,
    feature_names,
):
    model = XGBClassifier()
    model.load_model(model_filename)

    for frame_folder in sorted(get_subdirectories(input_path)):
        segment_names = [
            f.name for f in frame_folder.iterdir()
            if f.is_file() and f.name[1].isdigit()
        ]

        if len(segment_names) != 0:
            continue

        for camera_name in ["60", "180", "300"]:
            image_name = "camera" + camera_name + ".png"
            print(frame_folder / image_name)
            image_bgr = cv.imread(str(frame_folder / image_name))
            features, shape = create_features(image_bgr=image_bgr,
                                              flatten=True)

            X = pd.DataFrame(features)[feature_names]
            y = model.predict(X)

            segments = y.reshape(shape)

            segments_bgr = [class2bgr(idx) for idx in segments.flatten()]
            segments_bgr = np.array(segments_bgr).reshape(*shape, 3)

            path = output_path / frame_folder.name
            if not path.exists():
                path.mkdir(parents=True)

            image_and_segments_bgr = np.concatenate([image_bgr, segments_bgr],
                                                    axis=1)
            segments_filename = "camera" + camera_name + "_segments" + ".png"
            cv.imwrite(
                filename=str(path / segments_filename),
                img=image_and_segments_bgr,
            )

Esempio n. 14

0

Mostra file

File: model.py Progetto: ninguem26/kickhelper

def predict(inputs):

    main_category, category, goal, country, currency, today = inputs

    """ Encode inputs """
    encoder = load(ENCODER_PATH)
    inputs2enc = np.array([category, main_category, currency, country]).reshape(1, -1)
    inputs_encoded = encoder.transform(inputs2enc)

    """ Stack """
    numericals = np.array([goal, today.day, today.month]).reshape(1, -1)
    final_inputs = np.hstack([numericals, inputs_encoded]).astype(np.float32)

    """ Load model and predict """
    model = XGBClassifier(seed=42)
    model.load_model(MODEL_PATH)
    result = model.predict_proba(final_inputs)

    return result

Esempio n. 15

0

Mostra file

def predict(age,
            count,
            diagnosis, 
            fpath='static/model/HFEA_model_{}',
            nmodels=5):
    """
    Loads and predicts from the models.

    Parameters:
    -----------
    age : int,
        Age of the patient in years.

    count : int, 
        The number of Oocytes (eggs) collected following the treatment.

    diagnosis : str, 
        The patients infertility diagnosis, must be one of Ovulatory disorder, 
        Male factor, Endometriosis or Unexplained.

    fpath : str (default='static/model/HFEA_model_{}'),
        Path to the models.

    nmodel : int (default=5),
        The number of models (i.e., the number of folds used in the
        cross-validation proceedure during training).
        
    Returns:
    --------
    pred, float:
        
    """
    age_group = map_age_to_age_group(age)
    # add the four infertility diagnosis features
    infertility = create_infertility_feature(diagnosis)
    X = np.r_[[age_group, count], infertility]
    pred = 0
    for i in range(5):
        clf = XGBClassifier()
        clf.load_model(f'static/model/HFEA_model_{i}')
        pred += clf.predict_proba(X.reshape((1,-1)))[:,1][0] / nmodels
    return pred

Esempio n. 16

0

Mostra file

def predict(name, match_analysis_num, api_key):
    #모델 및 데이터 로드
    ss = joblib.load("model/standard_scaler.pkl")
    xgb = XGBClassifier()
    xgb.load_model("model/LOL_predict_xgb.bst")
    match_df, player_stat, game_minute, win_lable = datapipe.collect_predict_data_by_name(
        name, match_analysis_num, api_key)
    del [[player_stat]]
    gc.collect
    if (win_lable[0] == -1):
        return -1, -1
    elif (win_lable[0] == -404):
        return -404, -404
    else:
        #승률예측
        match_scaled = ss.fit_transform(match_df)
        win_rate = xgb.predict_proba(match_scaled)
        real_win_rate = win_lable.mean()
        predict_win_rate = win_rate[:, 1].mean()
        return real_win_rate, predict_win_rate

Esempio n. 17

0

Mostra file

def predict_xgb(data):
    """Perform prediction using trained model."""
    model = XGBClassifier()
    data = normalizator(prepare_data(data))
    try:
        model.load_model(
            "cotopaxi/identification_models/proto_XGB_20201112.model")
    except ValueError as exc:
        raise CotopaxiException from exc(
            "[!] Cannot load machine learning classifier!"
            "    This may be caused by incompatible version of tensorflow"
            "    (please install tensorflow version 2.2.0)!")
    result = model.predict(data)
    unique, counts = numpy.unique(result, return_counts=True)
    devices = list()
    for unit in unique:
        devices.append(unit)
    result_dict = dict(zip(devices, counts))
    result_dict = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
    result_class = result_dict[0][0]
    return result_class, result_dict, counts.sum()

Esempio n. 18

0

Mostra file

    def run(self):
        model_folder_path = '../model'
        model_path = os.path.join(model_folder_path, 'xgb_final.pkl')
        trained_model = XGBClassifier()
        trained_model.load_model(model_path)

        # read the processed features file
        df_test = pd.read_csv(self.input().path)

        # predict churn
        prediction = trained_model.predict(df_test)

        # putting prediction in dataframe as well as index ids (this is the
        #	the submission format)
        submission = pd.DataFrame(data=prediction, columns=['churn'])
        submission['churn'] = submission['churn'].map({1: 'yes', 0: 'no'})
        submission.reset_index(inplace=True)
        submission.rename(columns={'index': 'id'}, inplace=True)
        submission['id'] = submission['id'] + 1
        # write submission to file
        submission.to_csv(self.output().path, index=False)

Esempio n. 19

0

Mostra file

def stroke_predict(gender, age, hypertension, heart_disease, ever_married,
                   work_type, Residence_type, avg_glucose_level, bmi,
                   smoking_status):
    # creating the pandas dataframe and replicating previous steps
    a_dict = {
        'gender': [int(gender)],
        'age': [int(age)],
        'hypertension': [int(hypertension)],
        'heart_disease': [int(heart_disease)],
        'ever_married': [int(ever_married)],
        'work_type': [int(work_type)],
        'Residence_type': [int(Residence_type)],
        'avg_glucose_level': [float(avg_glucose_level)],
        'bmi': [float(bmi)],
        'smoking_status': [int(smoking_status)]
    }
    data = pd.DataFrame(a_dict)
    data['gender'] = 1 if gender == 'male' else 0
    data['ever_married'] = 1 if ever_married == 'Yes' else 0
    work_mapping = {
        'Self_employed': 3,
        'Private': 2,
        'children': 1,
        'Govt_job': 0
    }
    data['work_type'] = data['work_type'].map(work_mapping)
    data['Residence_type'] = 1 if Residence_type == 'Urban' else 0
    smoke_mapping = {
        'Unknown': 0,
        'formerly smoked': 1,
        'never_smoked': 2,
        'smokes': 3
    }
    data['smoking_status'] = data['smoking_status'].map(smoke_mapping)
    # after data has been replicated
    xgb = XGBClassifier()
    xgb.load_model("weights/stroke.model")
    return xgb.predict(data)[0]

Esempio n. 20

0

Mostra file

File: xgboost_model.py Progetto: rosaann/protein

def start_pre(val_img_list, val_tar_list, type_class=minor_type_class):
    real_class_pair_list = cut_class_pair

    model_base_path = 'outs/'
    result_list = [list() for i in range(len(val_img_list))]
    config = Config()

    result_max_item_list = [(0, 0) for i in range(len(val_img_list))]
    for ci, class_pair in enumerate(type_class):
        model_path = model_base_path + 'xgboost_model_per_class' + str(
            class_pair) + '.pkl'
        print('part ', ci, ' of ', len(real_class_pair_list))

        clr = XGBClassifier()
        clr.load_model(model_path)
        y_p_x = clr.predict_proba(val_img_list)

        pre_for_f1 = []
        t_for_f1 = []
        for i_ys, ys in enumerate(y_p_x):
            if len(val_tar_list) > 0:
                tail = ''
                mid = ''
                if class_pair in val_tar_list[i_ys]:
                    tail = '-----------'
                if ys[1] >= 0.5:
                    mid = '||||||||'
                print('ci ', ci, ' i_ys ', i_ys, ' pre ', ys, mid, ' c ',
                      class_pair, ' t ', val_tar_list[i_ys], tail)
            else:
                print('ci ', ci, ' i_ys ', i_ys, ' pre ', ys, ' c ',
                      class_pair)

            sub_result = result_list[i_ys]
            if ys[1] >= 0.5:
                sub_result.append(class_pair)
                pre_for_f1.append(1)
            else:
                pre_for_f1.append(0)

            result_list[i_ys] = sub_result
            max_item_idx, max_item_f = result_max_item_list[i_ys]
            if ys[1] > max_item_f:
                result_max_item_list[i_ys] = (class_pair, ys[1])
        if len(val_tar_list) > 0:
            for tar in val_tar_list:
                if class_pair in tar:
                    t_for_f1.append(1)
                else:
                    t_for_f1.append(0)
            print('c ', class_pair, '---------f1 ',
                  f1_score(t_for_f1, pre_for_f1, average="macro"))
    #  print('sub ', ci, ' r:', sub_result)

    pre_list = []
    for this_sub_i, sub_result in enumerate(result_list):
        print('this_sub_i ', this_sub_i, ' sub_result ', sub_result)
        result_i = np.zeros(28)
        for i_s, s in enumerate(sub_result):
            result_i[s] += 1

    #  print('result_i ', result_i)
        result = []
        for i, r_i in enumerate(result_i):
            if r_i == 1 and (i in type_class):
                #     print('i ', i,  ' r_i ', r_i)
                result.append(i)
        if len(val_tar_list) > 0:
            print('pre ', result, ' t ', val_tar_list[this_sub_i])
        pre_list.append(result)
    return pre_list, result_max_item_list

Esempio n. 21

0

Mostra file

File: featureclassifier.py Progetto: bhaskarbharat/spock

class FeatureClassifier():
    def __init__(self, modelfile='featureclassifier.json'):
        pwd = os.path.dirname(__file__)
        self.model = XGBClassifier()
        self.model.load_model(pwd + '/models/'+modelfile)

    def check_errors(self, sim):
        if sim.N_real < 4:
            raise AttributeError("SPOCK Error: SPOCK only applicable to systems with 3 or more planets") 
        
    def predict_stable(self, sim, n_jobs=-1):
        """
        Predict whether passed simulation will be stable over 10^9 orbits of the innermost planet.

        Parameters:

        sim (rebound.Simulation): Orbital configuration to test
        n_jobs (int):               Number of cores to use for calculation (only if passing more than one simulation). Default: Use all available cores. 

        Returns:

        float:  Estimated probability of stability. Will return exactly zero if configuration goes 
                unstable within first 10^4 orbits.

        """
        res = self.generate_features(sim, n_jobs=n_jobs)

        try:
            stable = np.array([r[1] for r in res]) 
            features = [r[0] for r in res]
            Nsims = len(sim)
        except:
            stable = np.array([res[1]])
            features = [res[0]]
            Nsims = 1

        # We take the negligible hit of evaluating XGBoost for all systems, and overwrite prob=0 for ones that went unstable in the short integration at the end
        # array of Ntrios x 10 features to evaluate with XGboost (Nsims*Ntriospersim x 10 features)
        featurevals = np.array([[val for val in trio.values()] for system in features for trio in system]) 
        probs = self.model.predict_proba(featurevals)[:,1] # take 2nd column for probability it belongs to stable class
        # XGBoost evaluated a flattened list of all trios, reshape so that trios in same sim grouped
        trios_per_sim = int(len(probs)/Nsims)
        probs = probs.reshape((Nsims, trios_per_sim))
        # Take the minimum probability of stability within the trios for each simulation
        probs = np.min(probs, axis=1)
        # Set probabilities for systems that went unstable within short integration to exactly zero
        probs[~stable] = 0

        if Nsims == 1:
            return probs[0]
        else:
            return probs

    def generate_features(self, sim, n_jobs=-1):
        """
        Generates the set of summary features used by the feature classifier for prediction. 

        Parameters:

        sim (rebound.Simulation): Orbital configuration to test
        n_jobs (int):               Number of cores to use for calculation (only if passing more than one simulation). Default: Use all available cores. 

        Returns:

        List of OrderedDicts:   A list of sets of features for each adjacent trio of planets in system.
                                Each set of features is an ordered dictionary of 10 summary features. See paper.
       
        stable (int):           An integer for whether the N-body integration survived the 10^4 orbits (1) or 
                                went unstable (0).
        """
        if isinstance(sim, rebound.Simulation):
            sim = [sim]
        
        args = []
        if len(set([s.N_real for s in sim])) != 1:
            raise ValueError("If running over many sims at once, they must have the same number of particles!")
        for s in sim:
            s = s.copy()
            init_sim_parameters(s)
            minP = np.min([p.P for p in s.particles[1:s.N_real]])
            self.check_errors(s)
            trios = [[j,j+1,j+2] for j in range(1,s.N_real-2)] # list of adjacent trios   
            featureargs = [10000, 80, trios]
            args.append([s, featureargs])

        def run(params):
            sim, featureargs = params
            triofeatures, stable = features(sim, featureargs)
            return triofeatures, stable

        if len(args) == 1: # single sim
            res = run(args[0])    # stable will be 0 if an orbit is hyperbolic
        else:

            if n_jobs == -1:
                n_jobs = cpu_count()
            #pool = ThreadPool(n_jobs)
            res = map(run, args)
       
        return list(res)

Esempio n. 22

0

Mostra file

File: estimate.py Progetto: daniekie/aml

    'model__colsample_bylevel': (0.01, 1.0, 'uniform'),
    'model__learning_rate': (0.01, 1.0, 'log-uniform'),
    'model__n_estimators': Integer(60, 400),
    'model__max_depth': Integer(3, 12),
    # 'model__scale_pos_weight': Real(1, 1000, 'log-uniform'), only binary
    'model__min_child_weight': Integer(1, 15),
    'model__gamma': Real(0.1, 3),
    'model__alpha': Real(0, 1),
    'model__lambda': Real(0, 1),
    'model__subsample': Real(0.3, 1),
    'model__colsample_bytree': Real(0, 1),
    'model__colsample_bynode': Real(0, 1)
}

xg = XGBClassifier()
xg.load_model('XGBoost_model.json')
xgb_search_prev = {
    'model': [xg],
    # 'model__learning_rate': (0.01, 1.0, 'log-uniform'),
    # 'model__min_child_weight': (0, 10),
    # 'model__max_delta_step': Integer(0, 20),
    # 'model__colsample_bytree': (0.01, 1.0, 'uniform'),
    # 'model__colsample_bylevel': (0.01, 1.0, 'uniform'),

    # 'model__n_estimators': Integer(100, 200),
    # 'model__scale_pos_weight': Real(1, 1000, 'log-uniform'),
    # 'model__min_child_weight': Integer(1, 10),
    # 'model__gamma': Integer(1, 5),
    # 'model__subsample': Real(0.3, 1),
    # 'model__colsample_bytree': Real(0.1, 1),
    # 'model__max_depth': Integer(6, 12)

Esempio n. 23

0

Mostra file

    RED = '\u001b[31m'
    GREEN = '\u001b[32m'
    BLUE = '\u001b[34m'
    RESET = '\033[0m'


xgb_mod = XGBClassifier(booster='dart',
                        tree_method="gpu_hist",
                        n_estimators=300,
                        learning_rate=0.05,
                        predictor='gpu_predictor',
                        eval_metric='logloss',
                        max_depth=3,
                        gpu_id=0)

xgb_mod.load_model('CVD_mod')

cvd_df = pd.read_csv('cardio_train.csv', sep=';', index_col=0)

cvd_df['age'] = cvd_df['age'] / 365.24
cvd_df['gender'] = cvd_df['gender'] - 1

cvd_df = cvd_df[(cvd_df['ap_lo'] <= 370) & (cvd_df['ap_lo'] > 0)]
cvd_df = cvd_df[(cvd_df['ap_hi'] <= 370) & (cvd_df['ap_hi'] > 0)]
cvd_df = cvd_df[cvd_df['ap_hi'] >= cvd_df['ap_lo']]

cvd_df.reset_index(drop=True, inplace=True)

X_train, X_test, y_train, y_test = train_test_split(cvd_df.drop(['cardio'],
                                                                axis=1),
                                                    cvd_df['cardio'],

Esempio n. 24

0

Mostra file

File: xgboost_algorithm.py Progetto: IvanCheplik/work-sample

class XGBoost(BaseAlgorithm):
    def __init__(self, algorithm_settings, problem_type):
        super().__init__(algorithm_settings)
        self.problem_type = problem_type

    def build(self):
        if self.problem_type == SupervisedTask.regression:
            self.build_regression_model()

        elif self.problem_type == SupervisedTask.classification:
            self.build_classification_model()

        else:
            raise TypeError('Unknown problem_type')

    def build_regression_model(self):
        from xgboost import XGBRegressor
        self.model = XGBRegressor(
            max_depth=self.algorithm_settings.max_depth,
            learning_rate=self.algorithm_settings.learning_rate,
            n_estimators=self.algorithm_settings.n_estimators,
            objective=self.algorithm_settings.objective,
            booster=self.algorithm_settings.booster,
            n_jobs=self.algorithm_settings.n_jobs,
            gamma=self.algorithm_settings.gamma,
            min_child_weight=self.algorithm_settings.min_child_weight,
            max_delta_step=self.algorithm_settings.max_delta_step,
            subsample=self.algorithm_settings.subsample,
            reg_alpha=self.algorithm_settings.reg_alpha,
            reg_lambda=self.algorithm_settings.reg_lambda,
            random_state=self.algorithm_settings.random_state)

    def build_classification_model(self):
        from xgboost import XGBClassifier
        self.model = XGBClassifier(
            max_depth=self.algorithm_settings.max_depth,
            learning_rate=self.algorithm_settings.learning_rate,
            n_estimators=self.algorithm_settings.n_estimators,
            objective=self.algorithm_settings.objective,
            booster=self.algorithm_settings.booster,
            n_jobs=self.algorithm_settings.n_jobs,
            gamma=self.algorithm_settings.gamma,
            min_child_weight=self.algorithm_settings.min_child_weight,
            max_delta_step=self.algorithm_settings.max_delta_step,
            subsample=self.algorithm_settings.subsample,
            reg_alpha=self.algorithm_settings.reg_alpha,
            reg_lambda=self.algorithm_settings.reg_lambda,
            random_state=self.algorithm_settings.random_state)

    def train(self, train_x, train_y, settings):
        self.model.fit(train_x,
                       train_y,
                       eval_metric=self.algorithm_settings.eval_metric)
        self.save(settings)

    def evaluate(self, test_x):
        prediction = self.model.predict(test_x)
        prediction = prediction.reshape(-1, 1)
        return prediction

    def load(self, model_path):
        self.model.load_model(fname=model_path)

    def save(self, settings):
        model_save_dir = os.path.join(settings.models_path, 'xgboost_models')
        os.makedirs(model_save_dir, exist_ok=True)
        model_name = self.get_model_name(settings)
        save_path = os.path.join(model_save_dir, model_name)
        self.model.save_model(fname=save_path)
        print(f"Model saved to: {save_path}")

    def get_model_name(self, settings):
        if settings.problem_type == SupervisedTask.regression:
            return 'regression_model.xgb'

        else:
            return 'classification_model.xgb'

Esempio n. 25

0

Mostra file

def get_model():
    xgboost_quora_model = BASE_URL + "/xgboost_xcfl_quora_model.model"
    x_cfl = XGBClassifier()
    x_cfl.load_model(xgboost_quora_model)
    return x_cfl

Esempio n. 26

0

Mostra file

# rmse, mae, logloss, error(설명 error가 accuracy), auc(설명 accuracy친구)

results = model.evals_result()
# print("eval's results : ", results)

# print("r2 Score : %.2f%%:" %(r2*100.0))

y_pred = model.predict(x_test)
acc = accuracy_score(y_pred, y_test)
print("acc : ", acc)

#####################################################################################################
# import pickle   # 파이썬에서 제공한다.

# from joblib import dump, load
# import joblib
# pickle.dump(model, open("./model/xgb_save/cancer.pickle.dat", "wb")) # wb형식으로 저장하겠다.
# joblib.dump(model, "./model/xgb_save/cancer.joblib.dat")
model.save_model("./model/xgb_save/cancer.xgb.model")
print("저장됬다.")

# model2 = pickle.load(open("./model/xgb_save/cancer.pickle.dat", "rb"))
# model2 = joblib.load("./model/xgb_save/cancer.joblib.dat")
model2 = XGBClassifier()
model2.load_model("./model/xgb_save/cancer.xgb.model")
print('불러왔다.')

y_pred = model2.predict(x_test)
acc = accuracy_score(y_pred, y_test)
print("acc : ", acc)

Esempio n. 27

0

Mostra file

    provides recommendations for portfolio action, backtests, and incrementally trains
    the given XGBoost Classifier Model'''

from xgboost import XGBClassifier
import cpdb
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer
from sklearn.metrics import zero_one_loss
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

btc_model = XGBClassifier()
eth_model = XGBClassifier()
btc_model = btc_model.load_model('btc_model.bin')
eth_model = eth_model.load_model('eth_model.bin')

model = {
    'btc': btc_model,
    'eth': eth_model,
}
'''Delivers a recommendation based on model classification'''


def get_recommendation(coinName, features):
    coin_model = model.get(coinName)
    recommendation = coin_model.predict(features)

    return recommendation

Esempio n. 28

0

Mostra file

File: classifier.py Progetto: abhijoshi2k/spam-email-classification

def is_spam(data, mode=2, classifier='manual'):

    if (classifier == 'manual'):
        message_body = data

        if (mode != 2):
            message_body = get_email(data, mode)

        clean_message = clean_message_no_html(message_body,
                                              stop_words=set(
                                                  stopwords.words('english')))

        word_columns_df = pd.DataFrame.from_records([clean_message])
        word_columns_df.index.name = 'DOC_ID'

        word_index = pd.Index(vocab.VOCAB_WORD)

        sparse_matrix = make_sparse_matrix(word_columns_df,
                                           word_index).groupby([
                                               'DOC_ID', 'WORD_ID'
                                           ]).sum().reset_index().to_numpy()

        full_matrix = make_full_matrix(sparse_matrix,
                                       vocab.shape[0]).to_numpy()

        spam_email_prob = PROB_SPAM
        ham_email_prob = 1 - PROB_SPAM
        # denominator = 1

        for j in range(full_matrix.shape[1]):

            if full_matrix[0, j] > 0:

                if prob_token_spam[j] > 0:
                    spam_email_prob = spam_email_prob * \
                        (prob_token_spam[j]**full_matrix[0, j])
                    if spam_email_prob == 0:
                        spam_email_prob = prev_spam
                        ham_email_prob = prev_ham
                        break

                if prob_token_ham[j] > 0:
                    ham_email_prob = ham_email_prob * \
                        (prob_token_ham[j]**full_matrix[0, j])
                    if ham_email_prob == 0:
                        spam_email_prob = prev_spam
                        ham_email_prob = prev_ham
                        break

                prev_spam = spam_email_prob
                prev_ham = ham_email_prob

                # denominator = denominator * prob_all_tokens[j]

        # print(spam_email_prob/denominator > ham_email_prob/denominator)
        print(spam_email_prob > ham_email_prob)

        # joint_log_spam = full_matrix.dot(
        #     np.log(prob_token_spam+0.000000000000001) - np.log(prob_all_tokens+0.000000000000001)) + np.log(PROB_SPAM)
        # print(joint_log_spam)

        # joint_log_ham = full_matrix.dot(
        #     np.log(prob_token_ham+0.000000000000001) - np.log(prob_all_tokens+0.000000000000001)) + np.log(1 - PROB_SPAM)
    # print(joint_log_ham)

    elif (classifier == 'xgb'):
        xgb_classifier = XGBClassifier()
        xgb_classifier.load_model('./XGB.model')

        data_list = []
        data_list.append(data)

        doc_term_matrix = vectorizer.transform(data_list)
        print(xgb_classifier.predict(doc_term_matrix)[0] == 1)

Esempio n. 29

0

Mostra file

class Classifier:

    # for initializing train and test sets, classifier and accuracy score
    # Change method to gpu_hist if you want xgboost to run on a GPU
    def __init__(self,
                 params={
                     'objective': 'reg:squarederror',
                     'verbosity': 0
                 }):
        self.X_train = []
        self.X_labels = []
        self.test = []
        self.test_labels = []
        self.model = XGBClassifier(**params)
        self.prediction = 0
        self.error = 0

    def size(self):
        if isinstance(self.X_train, np.ndarray):
            return self.X_train.size
        return len(self.X_train)

    # adding the data points
    def input_train(self, features, feature):
        if isinstance(self.X_train, np.ndarray) and self.X_train.size > 0:
            self.X_train = self.X_train.tolist()
            self.X_labels = self.X_labels.tolist()
        self.X_train.append(features)
        self.X_labels.append(feature)

    # train the data
    def train(self):
        self.X_train = np.asarray(self.X_train)
        self.X_labels = np.asarray(self.X_labels)
        self.model.fit(self.X_train, self.X_labels)

    def train_eval(self, metric='error'):
        self.X_train = np.asarray(self.X_train)
        self.X_labels = np.asarray(self.X_labels)
        X_train, X_test, y_train, y_test = train_test_split(self.X_train,
                                                            self.X_labels,
                                                            test_size=0.33)
        self.model.fit(X_train,
                       y_train,
                       eval_set=[(X_train, y_train), (X_test, y_test)],
                       eval_metric=metric)
        evals_result = self.model.evals_result()
        if metric == 'error':
            validations = []
            for val in evals_result.values():
                lst = val.get("error")
                validations.append(sum(lst) / len(lst))
            return 1 - (sum(validations) / len(validations))
        else:
            validations = []
            for val in evals_result.values():
                lst = val.get(metric)
                validations.append(lst[-1])
            return validations

    # input test labels if you want to check accuracy
    def label(self, label):
        self.test_labels.append(label)

    def input_test(self, features):
        if isinstance(self.test, np.ndarray) and self.test.size > 0:
            self.test = self.test.tolist()
        self.test.append(features)

    # test data
    def predict(self):
        if not isinstance(self.test, np.ndarray):
            self.test = np.asarray(self.test)
        self.prediction = self.model.predict(self.test)
        return self.prediction

    def predict_proba(self):
        if not isinstance(self.test, np.ndarray):
            self.test = np.asarray(self.test)
        self.prediction = self.model.predict_proba(self.test)
        return self.prediction

    # if you have the test labels you can check the error rate (you want error close to 0)
    def check_error(self):
        self.test_labels = np.asarray(self.test_labels)
        self.error = metrics.mean_absolute_error(self.test_labels,
                                                 self.prediction)
        return self.error

    # save classifier
    def save_classifier(self, file):
        self.model.save_model(file)

    # open saved classifier
    def open_classifier(self, file):
        self.model.load_model(file)

    # removes all training data
    def clean_train(self):
        self.X_train = []
        self.X_labels = []

    # removes all testing data
    def clean_test(self):
        self.test = []
        self.test_labels = []

Esempio n. 30

0

Mostra file

def predict_sent(vecs, xgb_model_analyze):
    xgb = XGBC()
    xgb.load_model(xgb_model_analyze)
    pred = predict(vecs, w2v_model, xgb, 300)
    df = pd.DataFrame(pred, columns=['sent'])
    return df