예제 #1
0
 def __init__(self, scaler_model, clf_model, hmm_model):
     prob_bins = np.array([-np.inf, 0.1, 0.3, 0.5, 0.7, 0.9, np.inf])
     bins_discretizer = KBinsDiscretizer(encode='ordinal')
     bins_discretizer.n_bins_ = np.array([prob_bins.shape[0]])
     bins_discretizer.bin_edges_ = prob_bins.reshape(1, -1)
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.scaler_model_ = scaler_model
     self.clf_model_ = clf_model
     self.hmm_model_ = hmm_model
     self.bins_discretizer_ = bins_discretizer
예제 #2
0
    def load_from_json(self, fname):
        # load the model
        import_data = json_tricks.load(open(fname))
        import_clf = ModifiedNB()
        import_clf.class_count_ = import_data['class_count_']
        import_clf.class_log_prior_ = import_data['class_log_prior_']
        import_clf.classes_ = import_data['classes_']
        import_clf.feature_count_ = import_data['feature_count_']
        import_clf.feature_log_prob_ = import_data['feature_log_prob_']
        self.clf = import_clf

        # load the fps dict vectoriser
        v_fps = DictVectorizer()
        dv = import_data['fps_vectoriser']
        v_fps.vocabulary_ = {int(k): v for k, v in dv['vocabulary_'].items()}
        v_fps.feature_names_ = dv['feature_names_']
        self.v_fps = v_fps

        # load the continous variables binariser
        try:
            binariser = import_data['binariser']
            kbd = KBinsDiscretizer(n_bins=10,
                                   encode='onehot',
                                   strategy='quantile')
            kbd.n_bins = binariser['n_bins']
            kbd.n_bins_ = binariser['n_bins_']
            kbd.bin_edges_ = np.asarray(
                [np.asarray(x) for x in binariser['bin_edges_']])
            encoder = OneHotEncoder()
            encoder.categories = binariser['categories']
            encoder._legacy_mode = False
            kbd._encoder = encoder
            self.kbd = kbd
        except Exception as e:
            pass

        # extra parameters
        self.trained = True
        self.con_desc_list = import_data['con_desc_list']
        self.fp_type = import_data['fp_type']
        self.fp_radius = import_data['fp_radius']
        self.informative_cvb = import_data['informative_cvb']
예제 #3
0
def discretize(X, max_bins):
    enc = KBinsDiscretizer(n_bins=max_bins, encode='onehot-dense')
    enc.fit(X)
    enc.bin_edges_ = [np.unique(edges) for edges in enc.bin_edges_]
    return enc.transform(X)