def deserialize_mlp(model_dict):
    model = MLPClassifier(**model_dict['params'])

    model.coefs_ = np.array(model_dict['coefs_'])
    model.loss_ = model_dict['loss_']
    model.intercepts_ = np.array(model_dict['intercepts_'])
    model.n_iter_ = model_dict['n_iter_']
    model.n_layers_ = model_dict['n_layers_']
    model.n_outputs_ = model_dict['n_outputs_']
    model.out_activation_ = model_dict['out_activation_']
    model._label_binarizer = deserialize_label_binarizer(
        model_dict['_label_binarizer'])

    model.classes_ = np.array(model_dict['classes_'])

    return model
예제 #2
0
    def deserialize_model(path):
        """Deserialize JSON object storing the ml model.
        Model (an MLPClassifier from sklearn) is re-instantiated
        with proper values.
        INPUT:
        --path: filepath for loading the JSON object
        OUTPUT:
        --model: Returns an MLPClassifier (sklearn) object
        """
        def deserialize_label_binarizer(label_binarizer_dict):
            label_binarizer = LabelBinarizer()
            label_binarizer.neg_label = label_binarizer_dict['neg_label']
            label_binarizer.pos_label = label_binarizer_dict['pos_label']
            label_binarizer.sparse_output = label_binarizer_dict[
                'sparse_output']
            label_binarizer.y_type_ = label_binarizer_dict['y_type_']
            label_binarizer.sparse_input_ = label_binarizer_dict[
                'sparse_input_']
            label_binarizer.classes_ = np.array(
                label_binarizer_dict['classes_'])

            return label_binarizer

        # Load (or deserialize) model from JSON
        model_dict = {}
        with open(path, 'r') as in_file:
            model_dict = json.load(in_file)

        model = MLPClassifier(**model_dict['params'])

        model.coefs_ = np.array(model_dict['coefs_'], dtype=object)
        model.loss_ = model_dict['loss_']
        model.intercepts_ = np.array(model_dict['intercepts_'], dtype=object)
        model.n_iter_ = model_dict['n_iter_']
        model.n_layers_ = model_dict['n_layers_']
        model.n_outputs_ = model_dict['n_outputs_']
        model.out_activation_ = model_dict['out_activation_']
        model._label_binarizer = deserialize_label_binarizer(
            model_dict['_label_binarizer'])
        model.features = list(model_dict['features'])

        model.classes_ = np.array(model_dict['classes_'])
        # Convert coeficients to numpy arrays to enable JSON deserialization
        # This is a hack to compensate for a bug in sklearn_json
        for i, x in enumerate(model.coefs_):
            model.coefs_[i] = np.array(x)
        return model
def test_serialize_model():
    instance = HostFootprint()
    model = MLPClassifier()
    label_binarizer = LabelBinarizer()
    label_binarizer.neg_label = 0
    label_binarizer.pos_label = 1
    label_binarizer.sparse_output = False
    label_binarizer.y_type_ = "binary"
    label_binarizer.sparse_input_ = False
    label_binarizer.classes_ = np.array([0])

    parameters = {'hidden_layer_sizes': [(64, 32)]}
    GridSearchCV(model, parameters, cv=5, n_jobs=-1, scoring='f1_weighted')

    model.coefs_ = np.array([[1], [2]])
    model.loss_ = 42
    model.intercepts_ = np.array([[3], [4]])
    model.classes_ = np.array([[5], [6]])
    model.n_iter_ = 42
    model.n_layers_ = 2
    model.n_outputs_ = 1
    model.out_activation_ = "logistic"
    model._label_binarizer = label_binarizer
    model.features = ['test_1', 'test_2', 'test_3']

    with tempfile.TemporaryDirectory() as tmpdir:
        model_file = os.path.join(tmpdir, 'host_footprint.json')
        instance.serialize_model(model, model_file)
        new_model = instance.deserialize_model(model_file)
        assert model.features == new_model.features
        print(f"model params: {model.get_params()}")
        print(f"new_model params: {new_model.get_params()}")
        assert len(model.get_params()['hidden_layer_sizes']) == len(
            new_model.get_params()['hidden_layer_sizes'])
        assert model._label_binarizer.y_type_ == new_model._label_binarizer.y_type_
        assert len(model.coefs_) == len(new_model.coefs_)
        assert len(model.intercepts_) == len(new_model.intercepts_)