Esempio n. 1
0
    def encoder_for(field):
      if field.get('optype') != 'categorical':
        return 'passthrough'

      encoder = OneHotEncoder()
      encoder.categories_ = np.array([self.field_mapping[field.get('name')][1].categories])
      encoder._legacy_mode = False
      return encoder
Esempio n. 2
0
    def load_from_json(self, fname):
        # load the model
        import_data = json_tricks.load(open(fname))
        import_clf = ModifiedNB()
        import_clf.class_count_ = import_data['class_count_']
        import_clf.class_log_prior_ = import_data['class_log_prior_']
        import_clf.classes_ = import_data['classes_']
        import_clf.feature_count_ = import_data['feature_count_']
        import_clf.feature_log_prob_ = import_data['feature_log_prob_']
        self.clf = import_clf

        # load the fps dict vectoriser
        v_fps = DictVectorizer()
        dv = import_data['fps_vectoriser']
        v_fps.vocabulary_ = {int(k): v for k, v in dv['vocabulary_'].items()}
        v_fps.feature_names_ = dv['feature_names_']
        self.v_fps = v_fps

        # load the continous variables binariser
        try:
            binariser = import_data['binariser']
            kbd = KBinsDiscretizer(n_bins=10,
                                   encode='onehot',
                                   strategy='quantile')
            kbd.n_bins = binariser['n_bins']
            kbd.n_bins_ = binariser['n_bins_']
            kbd.bin_edges_ = np.asarray(
                [np.asarray(x) for x in binariser['bin_edges_']])
            encoder = OneHotEncoder()
            encoder.categories = binariser['categories']
            encoder._legacy_mode = False
            kbd._encoder = encoder
            self.kbd = kbd
        except Exception as e:
            pass

        # extra parameters
        self.trained = True
        self.con_desc_list = import_data['con_desc_list']
        self.fp_type = import_data['fp_type']
        self.fp_radius = import_data['fp_radius']
        self.informative_cvb = import_data['informative_cvb']