Example #1
0
    def test_regression_model_with_constant_variation(self):
        """Estimate the model and run it on the same data as the estimation. The result should be equal to the original data.
        If there is a change in the explanatory variables, the result should not be equal.
        """
        storage = StorageFactory().get_storage('dict_storage')

        table_name = 'dataset_table'
        data = {
            "attr1": array([30, 0, 90, 100, 65, 50]),
            "attr2": array([2002, 1968, 1880, 1921, 1956, 1989]),
            "attr3": array([0.5, 0.1, 0.3, 0.9, 0.2, 0.8]),
            "outcome": array([20, 40, 15, 5, 40, 30], dtype="int32"),
            "id": array([1, 2, 3, 4, 5, 6])
        }
        storage.write_table(table_name=table_name, table_data=data)
        dataset = Dataset(in_storage=storage,
                          in_table_name=table_name,
                          id_name="id")

        specification = EquationSpecification(variables=("attr1", "attr2",
                                                         "attr3", "constant"),
                                              coefficients=("b1", "b2", "b3",
                                                            "constant"))

        model = RegressionModelWithAdditionInitialResiduals(
            outcome_attribute="outcome")
        coef, dummy = model.estimate(
            specification,
            dataset,
            outcome_attribute="outcome",
            procedure="opus_core.estimate_linear_regression")
        result = model.run(specification, coef, dataset)

        # if estimated and run on the same data, it should give the original outcome
        self.assertEqual(ma.allequal(result, data["outcome"]), True)

        # if some values changed it shoudn't be the same for those elements
        dataset.set_values_of_one_attribute("attr1", array([32, 10]),
                                            arange(2))
        result2 = model.run(specification, coef, dataset)
        self.assertEqual(ma.allequal(result2[0:2], data["outcome"][0:2]),
                         False)
        self.assertEqual(ma.allequal(result2[2:], data["outcome"][2:]), True)

        # check if exclusion of missing values is working
        dataset.set_values_of_one_attribute("outcome", array([0, 0]),
                                            array([2, 4]))
        dataset.delete_one_attribute("_init_error_outcome")
        model.run(specification,
                  coef,
                  dataset,
                  run_config=Configuration(
                      {'exclude_missing_values_from_initial_error': True}))
        initial_error = dataset.get_attribute("_init_error_outcome")
        self.assertEqual(ma.allequal(initial_error[array([2, 4])], 0), True)
        self.assertEqual(ma.allequal(initial_error[array([0, 1, 3, 4, 5])], 0),
                         False)
    def test_regression_model_with_constant_variation(self):
        """Estimate the model and run it on the same data as the estimation. The result should be equal to the original data.
        If there is a change in the explanatory variables, the result should not be equal.
        """
        storage = StorageFactory().get_storage('dict_storage')

        table_name = 'dataset_table'
        data = {
                "attr1":array([30, 0, 90, 100, 65, 50]),
                "attr2":array([2002, 1968, 1880, 1921, 1956, 1989]),
                "attr3":array([0.5, 0.1, 0.3, 0.9, 0.2, 0.8]),
                "outcome": array([20, 40, 15, 5, 40, 30], dtype="int32"),
                "id": array([1,2,3,4, 5, 6])
                }
        storage.write_table(
            table_name=table_name,
            table_data=data
            )
        dataset = Dataset(in_storage=storage, in_table_name=table_name, id_name= "id")

        specification = EquationSpecification(variables=(
            "attr1", "attr2", "attr3", "constant"),
            coefficients=("b1", "b2", "b3", "constant"))

        model = RegressionModelWithAdditionInitialResiduals(outcome_attribute = "outcome")
        coef, dummy = model.estimate(specification, dataset, outcome_attribute = "outcome",
                                     procedure = "opus_core.estimate_linear_regression")
        result = model.run(specification, coef, dataset)

        # if estimated and run on the same data, it should give the original outcome
        self.assertEqual(ma.allequal(result, data["outcome"]), True)

        # if some values changed it shoudn't be the same for those elements
        dataset.set_values_of_one_attribute("attr1", array([32, 10]), arange(2))
        result2 = model.run(specification, coef, dataset)
        self.assertEqual(ma.allequal(result2[0:2], data["outcome"][0:2]), False)
        self.assertEqual(ma.allequal(result2[2:], data["outcome"][2:]), True)
        
        # check if exclusion of missing values is working
        dataset.set_values_of_one_attribute("outcome", array([0,0]), array([2,4]))
        dataset.delete_one_attribute("_init_error_outcome")
        model.run(specification, coef, dataset, run_config=Configuration({
                                          'exclude_missing_values_from_initial_error': True}))
        initial_error = dataset.get_attribute("_init_error_outcome")
        self.assertEqual(ma.allequal(initial_error[array([2,4])], 0), True)
        self.assertEqual(ma.allequal(initial_error[array([0,1,3,4,5])], 0), False)