예제 #1
0
    def test_iris_one_student(self):
        data = self.load_data()
        oracle = Oracle(teacher=RuleModel(),
                        student_modelers=[RandomForestModeler()])
        oracle.build(data['training_data'])

        pred = oracle.predict(data['test_data'])['predictions']
        assert len(pred) == len(data['test_data']['y'])
        assert any(pred != data['test_data']['y'])
        # print(metrics.f1_score(data['test_data']['y'], pred, average='macro'))

        with tempfile.TemporaryDirectory() as path:
            os.environ['H1ST_MODEL_REPO_PATH'] = path
            version = oracle.persist()

            oracle_2 = Oracle(teacher=RuleModel(),
                              student_modelers=[RandomForestModeler()])
            oracle_2.load_params(version)

            assert 'sklearn' in str(type(oracle_2.students[0].base_model))
            pred_2 = oracle_2.predict(data['test_data'])['predictions']
            pred_df = pd.concat((pred, pred_2), axis=1)
            pred_df.columns = ['predictions', 'pred_2']
            assert len(
                pred_df[pred_df['predictions'] != pred_df['pred_2']]) == 0
예제 #2
0
    def test_rule_based_ensemble_one_equipment(self):
        data = self.load_data()
        data['training_data']['X'].drop('machineID', axis=1, inplace=True)
        oracle_modeler = TimeseriesOracleModeler(
            teacher=RuleModel(),
            student_modelers=[RandomForestModeler(),
                              AdaBoostModeler()],
            ensembler_modeler=RuleBasedModeler(
                model_class=RuleBasedClassificationModel))
        oracle = oracle_modeler.build_model(
            {'unlabeled_data': data['training_data']['X']}, ts_col='date')

        pred = oracle.predict(data['training_data'])['predictions']
        assert len(pred) == 2

        with tempfile.TemporaryDirectory() as path:
            os.environ['H1ST_MODEL_REPO_PATH'] = path
            version = oracle.persist()

            oracle_2 = TimeSeriesOracle(
                teacher=RuleModel(),
                students=[RandomForestModel(),
                          AdaBoostModel()],
                ensembler=RuleBasedClassificationModel())
            oracle_2.load_params(version)

            assert 'sklearn' in str(type(oracle_2.students[0].base_model))
            pred_2 = oracle_2.predict(data['training_data'])['predictions']
            pred_df = pd.DataFrame({'pred': pred, 'pred_2': pred_2})
            assert len(pred_df[pred_df['pred'] != pred_df['pred_2']]) == 0
예제 #3
0
 def __init__(self, teacher: PredictiveModel,
              ensembler_modeler: Modeler,
              student_modelers: List = [RandomForestModeler(), AdaBoostModeler()],
              model_class = Oracle
              ):
     self.teacher = teacher
     self.student_modelers = student_modelers
     self.ensembler_modeler = ensembler_modeler
     self.model_class = model_class
     self.stats = {}
예제 #4
0
    def test_ml_based_ensemble(self):
        data = self.load_data()

        oracle_modeler = TimeseriesOracleModeler(
            teacher=RuleModel(),
            student_modelers=[RandomForestModeler(),
                              AdaBoostModeler()],
            ensembler_modeler=MyMLModeler())

        num_samples = 2
        oracle = oracle_modeler.build_model(
            {
                'unlabeled_data': data['training_data']['X'],
                'labeled_data': {
                    'X_train': data['training_data']['X'],
                    'y_train': np.array(range(num_samples)),
                    'X_test': data['training_data']['X'],
                    'y_test': np.array(range(num_samples))
                }
            },
            id_col='machineID',
            ts_col='date')

        pred = oracle.predict(data['training_data'])['predictions']
        assert len(pred) == num_samples

        with tempfile.TemporaryDirectory() as path:
            os.environ['H1ST_MODEL_REPO_PATH'] = path
            version = oracle.persist()

            oracle_2 = TimeSeriesOracle(
                teacher=RuleModel(),
                students=[RandomForestModel(),
                          AdaBoostModel()],
                ensembler=MyMLModel())
            oracle_2.load_params(version)

            assert 'sklearn' in str(type(oracle_2.students[0].base_model))
            pred_2 = oracle_2.predict(data['training_data'])['predictions']
            pred_df = pd.DataFrame({'pred': pred, 'pred_2': pred_2})
            assert len(pred_df[pred_df['pred'] != pred_df['pred_2']]) == 0