def encoder_for(field): if field.get('optype') != 'categorical': return 'passthrough' encoder = OneHotEncoder() encoder.categories_ = np.array([self.field_mapping[field.get('name')][1].categories]) encoder._legacy_mode = False return encoder
def load_from_json(self, fname): # load the model import_data = json_tricks.load(open(fname)) import_clf = ModifiedNB() import_clf.class_count_ = import_data['class_count_'] import_clf.class_log_prior_ = import_data['class_log_prior_'] import_clf.classes_ = import_data['classes_'] import_clf.feature_count_ = import_data['feature_count_'] import_clf.feature_log_prob_ = import_data['feature_log_prob_'] self.clf = import_clf # load the fps dict vectoriser v_fps = DictVectorizer() dv = import_data['fps_vectoriser'] v_fps.vocabulary_ = {int(k): v for k, v in dv['vocabulary_'].items()} v_fps.feature_names_ = dv['feature_names_'] self.v_fps = v_fps # load the continous variables binariser try: binariser = import_data['binariser'] kbd = KBinsDiscretizer(n_bins=10, encode='onehot', strategy='quantile') kbd.n_bins = binariser['n_bins'] kbd.n_bins_ = binariser['n_bins_'] kbd.bin_edges_ = np.asarray( [np.asarray(x) for x in binariser['bin_edges_']]) encoder = OneHotEncoder() encoder.categories = binariser['categories'] encoder._legacy_mode = False kbd._encoder = encoder self.kbd = kbd except Exception as e: pass # extra parameters self.trained = True self.con_desc_list = import_data['con_desc_list'] self.fp_type = import_data['fp_type'] self.fp_radius = import_data['fp_radius'] self.informative_cvb = import_data['informative_cvb']