def test_model_isolation(self):
        # Test that we can correctly load two different models with the same entrypoint in the same process
        with TemporaryDirectory() as test_dir1:
            with TemporaryDirectory() as test_dir2:
                path1 = self.package_dummy_model(test_dir1, 1.0)
                path2 = self.package_dummy_model(test_dir2, 2.0)

                with load_neuropod(path1, _always_use_native=False) as n1:
                    with load_neuropod(path2, _always_use_native=False) as n2:
                        input_data = {"x": np.array([0], dtype=np.float32)}
                        self.assertEqual(n1.infer(input_data)["out"][0], 1.0)
                        self.assertEqual(n2.infer(input_data)["out"][0], 2.0)
예제 #2
0
    def test_python_deps(self):
        # Test that we can correctly load two different models with the same dependencies
        with TemporaryDirectory() as test_dir1:
            with TemporaryDirectory() as test_dir2:
                path1 = self.package_sklearn_model(test_dir1)
                path2 = self.package_sklearn_model(test_dir2)

                with load_neuropod(path1) as n1:
                    with load_neuropod(path2) as n2:
                        input_data = {
                            "x": np.array([[4, 5]], dtype=np.float64)
                        }
                        self.assertAlmostEqual(
                            n1.infer(input_data)["out"][0], 17)
                        self.assertAlmostEqual(
                            n2.infer(input_data)["out"][0], 17)
예제 #3
0
def load_and_test_neuropod(neuropod_path,
                           test_input_data,
                           test_expected_out=None,
                           neuropod_load_args={},
                           **kwargs):
    """
    Loads a neuropod in a new process and verifies that inference runs.
    If expected output is specified, the output of the model is checked against
    the expected values.

    Raises a ValueError if the outputs don't match the expected values
    """
    if RUN_NATIVE_TESTS:
        # Load the model using native out-of-process execution
        model = load_neuropod(neuropod_path, **neuropod_load_args)
        out = model.infer(test_input_data)
    else:
        # Run the evaluation in a new process. This is important to make sure
        # custom ops are being tested correctly
        args = neuropod_load_args.copy()

        # By default, we use the native bindings to run the model
        args["_always_use_native"] = False

        out = eval_in_new_process(neuropod_path,
                                  test_input_data,
                                  neuropod_load_args=args)

    # Check the output
    if test_expected_out is not None:
        # Throws a ValueError if the output doesn't match the expected value
        check_output_matches_expected(out, test_expected_out)
예제 #4
0
 def test_invalid_shape(self):
     with six.assertRaisesRegex(
         self,
         (ValueError, RuntimeError),
         "in the input spec is expected to have 2 dimensions, but had 1",
     ):
         neuropod = load_neuropod(TestSpecValidation.neuropod_path)
         neuropod.infer({"in_float32_matrix": np.asarray([3], dtype=np.float32)})
예제 #5
0
 def test_invalid_input_name(self):
     with six.assertRaisesRegex(
         self, (ValueError, RuntimeError), "are not found in the input spec"
     ):
         neuropod = load_neuropod(TestSpecValidation.neuropod_path)
         neuropod.infer(
             {"bogus": np.asarray([[1.1, 2.2], [0, 1], [2, 3]], dtype=np.float32)}
         )
예제 #6
0
 def test_no_inputs(self):
     neuropod = load_neuropod(TestSpecValidation.neuropod_path)
     result = neuropod.infer({})
     self.assertGreater(result["out_string_vector"].shape[0], 0)
     self.assertEqual(
         result["out_string_vector"].shape[0], result["out_int_matrix"].shape[0]
     )
     self.assertGreater(result["out_float_matrix"].shape[0], 0)
예제 #7
0
 def test_some_inputs(self):
     neuropod = load_neuropod(TestSpecValidation.neuropod_path)
     result = neuropod.infer(
         {
             "in_float32_matrix": np.asarray(
                 [[1.1, 2.2], [0, 1], [2, 3]], dtype=np.float32
             )
         }
     )
     self.assertGreater(result["out_string_vector"].shape[0], 0)
예제 #8
0
def check_strings_model(neuropod_path):
    """
    Validate that the inputs and outputs of the loaded neuropod match
    the problem spec
    """
    with load_neuropod(neuropod_path, _always_use_native=False) as neuropod:
        target = get_string_concat_model_spec()

        # Validate that the specs match
        check_specs_match(neuropod.inputs, target["input_spec"])
        check_specs_match(neuropod.outputs, target["output_spec"])
예제 #9
0
 def test_stateful_model(self):
     # `init_op` can be passed a list of strings or a string
     for init_op_name_as_list in [False, True]:
         with TemporaryDirectory() as test_dir:
             neuropod_path = os.path.join(test_dir, "test_neuropod")
             self.package_accumulator_model(neuropod_path,
                                            init_op_name_as_list)
             neuropod_obj = load_neuropod(neuropod_path)
             np.testing.assert_equal(neuropod_obj.name, "accumulator_model")
             np.testing.assert_equal(neuropod_obj.platform, "tensorflow")
             np.testing.assert_equal(
                 neuropod_obj.infer({"x": np.float32(2.0)}), {"out": 2.0})
             np.testing.assert_equal(
                 neuropod_obj.infer({"x": np.float32(4.0)}), {"out": 6.0})
예제 #10
0
def check_addition_model(neuropod_path):
    """
    Validate that the inputs and outputs of the loaded neuropod match
    the problem spec
    """
    with load_neuropod(neuropod_path) as neuropod:
        target = get_addition_model_spec()

        # Validate that the specs match
        check_specs_match(neuropod.inputs, target["input_spec"])
        check_specs_match(neuropod.outputs, target["output_spec"])
        expected_name = "addition_model"
        if neuropod.name != expected_name:
            raise ValueError("Expected model name '{}'. Got '{}'".format(
                expected_name, neuropod.name))

        if not neuropod.platform:
            raise ValueError("Expected the platform field to be set")
예제 #11
0
def t_neuropod(csv_filename):
    #######
    # Setup
    #######
    dir_path = os.path.dirname(csv_filename)
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')
    audio_dest_folder = os.path.join(os.getcwd(), 'generated_audio')

    input_features = [
        binary_feature(),
        numerical_feature(),
        category_feature(vocab_size=3),
        sequence_feature(vocab_size=3),
        text_feature(vocab_size=3),
        vector_feature(),
        image_feature(image_dest_folder),
        audio_feature(audio_dest_folder),
        timeseries_feature(),
        date_feature(),
        h3_feature(),
        set_feature(vocab_size=3),
        bag_feature(vocab_size=3),
    ]

    output_features = [
        binary_feature(),
        numerical_feature(),
        category_feature(vocab_size=3),
        sequence_feature(vocab_size=3),
        text_feature(vocab_size=3),
        set_feature(vocab_size=3),
        vector_feature()
    ]

    # Generate test data
    data_csv_path = generate_data(input_features, output_features,
                                  csv_filename)

    #############
    # Train model
    #############
    model_definition = {
        'input_features': input_features,
        'output_features': output_features,
        'training': {
            'epochs': 2
        }
    }
    ludwig_model = LudwigModel(model_definition)
    ludwig_model.train(
        data_csv=data_csv_path,
        skip_save_training_description=True,
        skip_save_training_statistics=True,
        skip_save_model=True,
        skip_save_progress=True,
        skip_save_log=True,
        skip_save_processed_input=True,
    )
    original_predictions_df = ludwig_model.predict(data_csv=data_csv_path)

    ###################
    # save Ludwig model
    ###################
    ludwigmodel_path = os.path.join(dir_path, 'ludwigmodel')
    shutil.rmtree(ludwigmodel_path, ignore_errors=True)
    ludwig_model.save(ludwigmodel_path)

    ################
    # build neuropod
    ################
    neuropod_path = os.path.join(dir_path, 'neuropod')
    export_neuropod(ludwigmodel_path, neuropod_path=neuropod_path)

    ########################
    # predict using neuropod
    ########################
    data_df = pd.read_csv(data_csv_path)
    if_dict = {
        input_feature['name']: np.expand_dims(
            np.array([str(x) for x in data_df[input_feature['name']].tolist()],
                     dtype='str'), 1)
        for input_feature in input_features
    }

    from neuropod.loader import load_neuropod
    neuropod_model = load_neuropod(neuropod_path)
    preds = neuropod_model.infer(if_dict)

    for key in preds:
        preds[key] = np.squeeze(preds[key])

    #########
    # cleanup
    #########
    # Delete the temporary data created
    for path in [
            ludwigmodel_path, neuropod_path, image_dest_folder,
            audio_dest_folder
    ]:
        if os.path.exists(path):
            if os.path.isfile(path):
                os.remove(path)
            else:
                shutil.rmtree(path, ignore_errors=True)

    ########
    # checks
    ########
    for output_feature in output_features:
        output_feature_name = output_feature['name']
        output_feature_type = output_feature['type']

        if (output_feature_name + "_predictions" in preds
                and output_feature_name + "_predictions"
                in original_predictions_df):
            neuropod_pred = preds[output_feature_name +
                                  "_predictions"].tolist()
            if output_feature_type == BINARY:
                neuropod_pred = list(map(lambda x: str2bool(x), neuropod_pred))
            if output_feature_type in {SEQUENCE, TEXT, SET}:
                neuropod_pred = list(map(lambda x: x.split(), neuropod_pred))

            original_pred = original_predictions_df[output_feature_name +
                                                    "_predictions"].tolist()

            assert neuropod_pred == original_pred

        if (output_feature_name + "_probability" in preds
                and output_feature_name + "_probability"
                in original_predictions_df):
            neuropod_prob = preds[output_feature_name +
                                  "_probability"].tolist()
            if output_feature_type in {SEQUENCE, TEXT, SET}:
                neuropod_prob = list(
                    map(lambda x: [float(n) for n in x.split()],
                        neuropod_prob))
            if any(isinstance(el, list) for el in neuropod_prob):
                neuropod_prob = np.array(
                    list(itertools.zip_longest(*neuropod_prob, fillvalue=0))).T

            original_prob = original_predictions_df[output_feature_name +
                                                    "_probability"].tolist()
            if any(isinstance(el, list) for el in original_prob):
                original_prob = np.array(
                    list(itertools.zip_longest(*original_prob, fillvalue=0))).T

            assert np.isclose(neuropod_prob, original_prob).all()

        if (output_feature_name + "_probabilities" in preds
                and output_feature_name + "_probabilities"
                in original_predictions_df):
            neuropod_prob = preds[output_feature_name +
                                  "_probabilities"].tolist()

            original_prob = original_predictions_df[output_feature_name +
                                                    "_probabilities"].tolist()

            assert np.isclose(neuropod_prob, original_prob).all()
예제 #12
0
def test_neuropod(csv_filename):
    #######
    # Setup
    #######
    with tempfile.TemporaryDirectory() as tmpdir:
        dir_path = tmpdir
        data_csv_path = os.path.join(tmpdir, csv_filename)
        image_dest_folder = os.path.join(tmpdir, "generated_images")
        audio_dest_folder = os.path.join(tmpdir, "generated_audio")

        input_features = [
            binary_feature(),
            numerical_feature(),
            category_feature(vocab_size=3),
            sequence_feature(vocab_size=3),
            text_feature(vocab_size=3),
            vector_feature(),
            image_feature(image_dest_folder),
            audio_feature(audio_dest_folder),
            timeseries_feature(),
            date_feature(),
            h3_feature(),
            set_feature(vocab_size=3),
            bag_feature(vocab_size=3),
        ]

        output_features = [
            binary_feature(),
            numerical_feature(),
            category_feature(vocab_size=3),
            sequence_feature(vocab_size=3),
            text_feature(vocab_size=3),
            set_feature(vocab_size=3),
            vector_feature(),
        ]

        # Generate test data
        data_csv_path = generate_data(input_features, output_features, data_csv_path)

        #############
        # Train model
        #############
        config = {"input_features": input_features, "output_features": output_features, "training": {"epochs": 2}}
        ludwig_model = LudwigModel(config, backend=LocalTestBackend())
        ludwig_model.train(
            dataset=data_csv_path,
            skip_save_training_description=True,
            skip_save_training_statistics=True,
            skip_save_progress=True,
            skip_save_log=True,
            skip_save_processed_input=True,
            output_directory=dir_path,
        )

        data_df = pd.read_csv(data_csv_path)
        original_predictions_df, _ = ludwig_model.predict(dataset=data_df)

        ###################
        # save Ludwig model
        ###################
        ludwigmodel_path = os.path.join(dir_path, "ludwigmodel")
        shutil.rmtree(ludwigmodel_path, ignore_errors=True)
        ludwig_model.save(ludwigmodel_path)

        ################
        # build neuropod
        ################
        neuropod_path = os.path.join(dir_path, "neuropod")
        shutil.rmtree(neuropod_path, ignore_errors=True)
        export_neuropod(ludwigmodel_path, neuropod_path=neuropod_path, entrypoint="get_test_model")

        ########################
        # predict using neuropod
        ########################
        if_dict = {
            input_feature["name"]: np.expand_dims(
                np.array([str(x) for x in data_df[input_feature["name"]].tolist()], dtype="str"), 1
            )
            for input_feature in input_features
        }

        from neuropod.loader import load_neuropod

        neuropod_model = load_neuropod(neuropod_path, _always_use_native=False)
        preds = neuropod_model.infer(if_dict)

        for key in preds:
            preds[key] = np.squeeze(preds[key])

        #########
        # cleanup
        #########
        # Delete the temporary data created
        for path in [ludwigmodel_path, neuropod_path, image_dest_folder, audio_dest_folder]:
            if os.path.exists(path):
                if os.path.isfile(path):
                    os.remove(path)
                else:
                    shutil.rmtree(path, ignore_errors=True)

        ########
        # checks
        ########
        for output_feature in output_features:
            output_feature_name = output_feature["name"]
            output_feature_type = output_feature["type"]

            if (
                output_feature_name + "_predictions" in preds
                and output_feature_name + "_predictions" in original_predictions_df
            ):
                neuropod_pred = preds[output_feature_name + "_predictions"].tolist()
                if output_feature_type == BINARY:
                    neuropod_pred = [str2bool(x) for x in neuropod_pred]
                if output_feature_type in {SEQUENCE, TEXT, SET}:
                    neuropod_pred = [x.split() for x in neuropod_pred]

                original_pred = original_predictions_df[output_feature_name + "_predictions"].tolist()

                assert neuropod_pred == original_pred

            if (
                output_feature_name + "_probability" in preds
                and output_feature_name + "_probability" in original_predictions_df
            ):
                neuropod_prob = preds[output_feature_name + "_probability"].tolist()
                if output_feature_type in {SEQUENCE, TEXT, SET}:
                    neuropod_prob = [[float(n) for n in x.split()] for x in neuropod_prob]
                if any(isinstance(el, list) for el in neuropod_prob):
                    neuropod_prob = np.array(list(itertools.zip_longest(*neuropod_prob, fillvalue=0))).T

                original_prob = original_predictions_df[output_feature_name + "_probability"].tolist()
                if any(isinstance(el, list) for el in original_prob):
                    original_prob = np.array(list(itertools.zip_longest(*original_prob, fillvalue=0))).T

                assert np.allclose(neuropod_prob, original_prob)

            if (
                output_feature_name + "_probabilities" in preds
                and output_feature_name + "_probabilities" in original_predictions_df
            ):
                neuropod_prob = preds[output_feature_name + "_probabilities"].tolist()

                original_prob = original_predictions_df[output_feature_name + "_probabilities"].tolist()

                assert np.allclose(neuropod_prob, original_prob)
예제 #13
0
def test_neuropod(csv_filename):
    #######
    # Setup
    #######
    dir_path = os.path.dirname(csv_filename)

    output_feature_options = []

    # Single sequence input, multiple outputs
    sf = sequence_feature()
    input_features = [sf]
    input_feature_name = input_features[0]['name']

    output_features = [
        binary_feature(),
        numerical_feature(),
        category_feature(vocab_size=3),
        sequence_feature(vocab_size=3),
        text_feature(vocab_size=3),
        set_feature(vocab_size=3),
        vector_feature()
    ]

    # Generate test data
    data_csv_path = generate_data(input_features, output_features,
                                  csv_filename)

    #############
    # Train model
    #############
    model_definition = {
        'input_features': input_features,
        'output_features': output_features,
        'training': {
            'epochs': 2
        }
    }
    ludwig_model = LudwigModel(model_definition)
    ludwig_model.train(
        data_csv=data_csv_path,
        skip_save_training_description=True,
        skip_save_training_statistics=True,
        skip_save_model=True,
        skip_save_progress=True,
        skip_save_log=True,
        skip_save_processed_input=True,
    )
    original_predictions_df = ludwig_model.predict(data_csv=data_csv_path)

    ###################
    # save Ludwig model
    ###################
    ludwigmodel_path = os.path.join(dir_path, 'ludwigmodel')
    shutil.rmtree(ludwigmodel_path, ignore_errors=True)
    ludwig_model.save(ludwigmodel_path)

    ################
    # build neuropod
    ################
    neuropod_path = os.path.join(dir_path, 'neuropod')
    export_neuropod(ludwigmodel_path, neuropod_path=neuropod_path)

    ########################
    # predict using neuropod
    ########################
    data_df = pd.read_csv(data_csv_path)
    if_vals = data_df[input_feature_name].tolist()

    from neuropod.loader import load_neuropod
    neuropod_model = load_neuropod(neuropod_path)
    preds = neuropod_model.infer(
        {input_feature_name: np.array(if_vals, dtype='str')})

    #########
    # cleanup
    #########
    for path in [ludwigmodel_path, neuropod_path]:
        if os.path.exists(path):
            if os.path.isfile(path):
                os.remove(path)
            else:
                shutil.rmtree(path, ignore_errors=True)

    ########
    # checks
    ########
    for output_feature in output_features:
        output_feature_name = output_feature['name']
        output_feature_type = output_feature['type']

        if (output_feature_name + "_predictions" in preds
                and output_feature_name + "_predictions"
                in original_predictions_df):
            neuropod_pred = preds[output_feature_name +
                                  "_predictions"].tolist()
            if output_feature_type == BINARY:
                neuropod_pred = list(map(lambda x: str2bool(x), neuropod_pred))
            if output_feature_type in {SEQUENCE, TEXT, SET}:
                neuropod_pred = list(map(lambda x: x.split(), neuropod_pred))

            original_pred = original_predictions_df[output_feature_name +
                                                    "_predictions"].tolist()

            assert neuropod_pred == original_pred

        if (output_feature_name + "_probability" in preds
                and output_feature_name + "_probability"
                in original_predictions_df):
            neuropod_prob = preds[output_feature_name +
                                  "_probability"].tolist()
            if output_feature_type in {SEQUENCE, TEXT, SET}:
                neuropod_prob = list(
                    map(lambda x: [float(n) for n in x.split()],
                        neuropod_prob))
            if any(isinstance(el, list) for el in neuropod_prob):
                neuropod_prob = np.array(
                    list(itertools.zip_longest(*neuropod_prob, fillvalue=0))).T

            original_prob = original_predictions_df[output_feature_name +
                                                    "_probability"].tolist()
            if any(isinstance(el, list) for el in original_prob):
                original_prob = np.array(
                    list(itertools.zip_longest(*original_prob, fillvalue=0))).T

            assert np.isclose(neuropod_prob, original_prob).all()

        if (output_feature_name + "_probabilities" in preds
                and output_feature_name + "_probabilities"
                in original_predictions_df):
            neuropod_prob = preds[output_feature_name +
                                  "_probabilities"].tolist()

            original_prob = original_predictions_df[output_feature_name +
                                                    "_probabilities"].tolist()

            assert np.isclose(neuropod_prob, original_prob).all()
예제 #14
0
def test_neuropod_torchscript(csv_filename, tmpdir):
    data_csv_path = os.path.join(tmpdir, csv_filename)

    # Configure features to be tested:
    bin_str_feature = binary_feature()
    input_features = [
        bin_str_feature,
        # binary_feature(),
        number_feature(),
        category_feature(vocab_size=3),
        # TODO: future support
        # sequence_feature(vocab_size=3),
        # text_feature(vocab_size=3),
        # vector_feature(),
        # image_feature(image_dest_folder),
        # audio_feature(audio_dest_folder),
        # timeseries_feature(),
        # date_feature(),
        # h3_feature(),
        # set_feature(vocab_size=3),
        # bag_feature(vocab_size=3),
    ]
    output_features = [
        bin_str_feature,
        # binary_feature(),
        number_feature(),
        category_feature(vocab_size=3),
        # TODO: future support
        # sequence_feature(vocab_size=3),
        # text_feature(vocab_size=3),
        # set_feature(vocab_size=3),
        # vector_feature()
    ]
    backend = LocalTestBackend()
    config = {
        "input_features": input_features,
        "output_features": output_features,
        TRAINER: {
            "epochs": 2
        }
    }

    # Generate training data
    training_data_csv_path = generate_data(input_features, output_features,
                                           data_csv_path)

    # Convert bool values to strings, e.g., {'Yes', 'No'}
    df = pd.read_csv(training_data_csv_path)
    false_value, true_value = "No", "Yes"
    df[bin_str_feature[NAME]] = df[bin_str_feature[NAME]].map(
        lambda x: true_value if x else false_value)
    df.to_csv(training_data_csv_path)

    # Train Ludwig (Pythonic) model:
    ludwig_model = LudwigModel(config, backend=backend)
    ludwig_model.train(
        dataset=training_data_csv_path,
        skip_save_training_description=True,
        skip_save_training_statistics=True,
        skip_save_model=True,
        skip_save_progress=True,
        skip_save_log=True,
        skip_save_processed_input=True,
    )

    # Obtain predictions from Python model
    preds_dict, _ = ludwig_model.predict(dataset=training_data_csv_path,
                                         return_type=dict)

    # Create graph inference model (Torchscript) from trained Ludwig model.
    neuropod_path = os.path.join(tmpdir, "neuropod")
    export_neuropod(ludwig_model, neuropod_path)

    from neuropod.loader import load_neuropod

    neuropod_module = load_neuropod(neuropod_path)

    def to_input(s: pd.Series) -> Union[List[str], torch.Tensor]:
        if s.dtype == "object":
            return np.array(s.to_list())
        return s.to_numpy().astype(np.float32)

    df = pd.read_csv(training_data_csv_path)
    inputs = {
        name: to_input(df[feature.column])
        for name, feature in ludwig_model.model.input_features.items()
    }
    outputs = neuropod_module.infer(inputs)

    # Compare results from Python trained model against Neuropod
    assert len(preds_dict) == len(outputs)
    for feature_name, feature_outputs_expected in preds_dict.items():
        assert feature_name in outputs

        output_values_expected = feature_outputs_expected[PREDICTIONS]
        output_values = outputs[feature_name]
        if output_values.dtype.type in {np.string_, np.str_}:
            # Strings should match exactly
            assert np.all(output_values == output_values_expected
                          ), f"feature: {feature_name}, output: predictions"
        else:
            assert np.allclose(
                output_values, output_values_expected
            ), f"feature: {feature_name}, output: predictions"