def deserialize_model(path): """Deserialize JSON object storing the ml model. Model (an MLPClassifier from sklearn) is re-instantiated with proper values. INPUT: --path: filepath for loading the JSON object OUTPUT: --model: Returns an MLPClassifier (sklearn) object """ def deserialize_label_binarizer(label_binarizer_dict): label_binarizer = LabelBinarizer() label_binarizer.neg_label = label_binarizer_dict['neg_label'] label_binarizer.pos_label = label_binarizer_dict['pos_label'] label_binarizer.sparse_output = label_binarizer_dict[ 'sparse_output'] label_binarizer.y_type_ = label_binarizer_dict['y_type_'] label_binarizer.sparse_input_ = label_binarizer_dict[ 'sparse_input_'] label_binarizer.classes_ = np.array( label_binarizer_dict['classes_']) return label_binarizer # Load (or deserialize) model from JSON model_dict = {} with open(path, 'r') as in_file: model_dict = json.load(in_file) model = MLPClassifier(**model_dict['params']) model.coefs_ = np.array(model_dict['coefs_'], dtype=object) model.loss_ = model_dict['loss_'] model.intercepts_ = np.array(model_dict['intercepts_'], dtype=object) model.n_iter_ = model_dict['n_iter_'] model.n_layers_ = model_dict['n_layers_'] model.n_outputs_ = model_dict['n_outputs_'] model.out_activation_ = model_dict['out_activation_'] model._label_binarizer = deserialize_label_binarizer( model_dict['_label_binarizer']) model.features = list(model_dict['features']) model.classes_ = np.array(model_dict['classes_']) # Convert coeficients to numpy arrays to enable JSON deserialization # This is a hack to compensate for a bug in sklearn_json for i, x in enumerate(model.coefs_): model.coefs_[i] = np.array(x) return model
def test_serialize_model(): instance = HostFootprint() model = MLPClassifier() label_binarizer = LabelBinarizer() label_binarizer.neg_label = 0 label_binarizer.pos_label = 1 label_binarizer.sparse_output = False label_binarizer.y_type_ = "binary" label_binarizer.sparse_input_ = False label_binarizer.classes_ = np.array([0]) parameters = {'hidden_layer_sizes': [(64, 32)]} GridSearchCV(model, parameters, cv=5, n_jobs=-1, scoring='f1_weighted') model.coefs_ = np.array([[1], [2]]) model.loss_ = 42 model.intercepts_ = np.array([[3], [4]]) model.classes_ = np.array([[5], [6]]) model.n_iter_ = 42 model.n_layers_ = 2 model.n_outputs_ = 1 model.out_activation_ = "logistic" model._label_binarizer = label_binarizer model.features = ['test_1', 'test_2', 'test_3'] with tempfile.TemporaryDirectory() as tmpdir: model_file = os.path.join(tmpdir, 'host_footprint.json') instance.serialize_model(model, model_file) new_model = instance.deserialize_model(model_file) assert model.features == new_model.features print(f"model params: {model.get_params()}") print(f"new_model params: {new_model.get_params()}") assert len(model.get_params()['hidden_layer_sizes']) == len( new_model.get_params()['hidden_layer_sizes']) assert model._label_binarizer.y_type_ == new_model._label_binarizer.y_type_ assert len(model.coefs_) == len(new_model.coefs_) assert len(model.intercepts_) == len(new_model.intercepts_)