Esempio n. 1
0
    def setUp(self) -> None:
        self.arti = AI('test', 'test2')

        data = {'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 0],
                'col2': ['train', 'eval', 'train', 'eval', 'eval', 'train1', 'eval', 'train', 'train', 'train1']}

        self.dataframe = pd.DataFrame(data=data)
Esempio n. 2
0
    def test_constructor_build(self):
        data_builder = DataBuilder(data_source='C:/python/practice2/AIBuilder/tests/data/test_data.csv',
                                   target_column='target_1',
                                   data_columns=['feature_1', 'feature_2', 'feature_3'])

        arti = AI(project_name='name', log_dir='path/to/dir')
        data_builder.validate()
        data_builder.build(ml_model=arti)

        column_names = ['feature_1', 'feature_2', 'feature_3', 'target_1']
        self.validate_data_frame(arti.training_data, column_names)
Esempio n. 3
0
    def create_AI(self, builders: list, ai_name: str = None) -> AbstractAI:
        self.validate_builders(builders)
        ml_model = AI(self.project_name, self.log_dir, ai_name)
        self.dispatcher.dispatch(
            ModelEvent(KernelDispatcher.PRE_RUN_BUILDERS, ml_model))
        name = ml_model.get_name()

        description = {}
        for builder in builders:
            self.console_printer.line('running: ' + builder.__class__.__name__)
            if isinstance(builder, EstimatorBuilder):
                # overwrite old name from being loaded from cache.
                ml_model.set_name(name)

            result = builder.build(ml_model)
            if result is not None:
                # Result will be None if cache prevents execution, keep using the old model as it has the description,
                # which is required for determining function cache key.
                # todo: fix this, we should not be passing a model just because the cache needs the description.
                ml_model = result

            description[builder.builder_type] = builder.describe()
            ml_model.description = description

        return ml_model
Esempio n. 4
0
    def run(self):
        self.model = None
        pre_run_event = TesterEvent(event_name=KernelDispatcher.PRE_RUN,
                                    session=self.session,
                                    tester=self.tester)
        self.dispatcher.dispatch(pre_run_event)
        while self.factory.has_next_ai():
            #todo: move the new is unique logic somewhere else and changing sequence of operation seems to difficult, smell?
            final_description = self.preview_model_description()
            final_description_hash = AITester.stable_hash_description(
                self.preview_model_description())
            log_dir_path = self.factory.log_dir
            if not AITester.is_hash_unique_to_report(
                    log_dir_path=log_dir_path,
                    description_hash=final_description_hash):
                self.factory.builder_permutations.pop()
                #todo: some kind of null model, as it is just used to pervey name and log dir?
                model = AI(self.factory.project_name, self.factory.log_dir,
                           self.factory.project_name + '_X')
                model.description = final_description
                self.tester.ml_model = model
                self.ModelNotUnique()
                continue

            self.doCreateModel()

            if self.train:
                self.doTrainModel()

            self.all_models.append(self.model)
            if self.evaluate:
                self.doEvaluateModel()

        if self.predict:
            self.prediction_results = self.doPredict()

        post_run_event = TesterEvent(event_name=KernelDispatcher.POST_RUN,
                                     session=self.session,
                                     tester=self.tester)
        self.dispatcher.dispatch(post_run_event)
    def setUp(self):
        strategy_class = self.get_strategy_class_name()

        self.ml_model = AI(project_name='test',
                           log_dir='test/dir',
                           name='test')

        self.ml_model.optimizer = mock.Mock()
        self.ml_model.training_data = mock.Mock()
        self.ml_model.training_data.get_tf_feature_columns = mock.Mock()

        kwargs = {'ml_model': self.ml_model}
        kwargs.update(self.additional_parameters())

        self.strategy = strategy_class(**kwargs)
Esempio n. 6
0
    def setUp(self):
        data_array = {'feat_A': [1, 2, 3], 'feat_B': [8, 6, 4], 'target': [9, 8, 7]}
        df2 = df = pd.DataFrame(data_array)

        train_model = DataModel(df)
        train_model.set_target_column('target')
        train_model.set_feature_columns(['feat_A', 'feat_B'])

        eval_model = DataModel(df2)
        eval_model.set_target_column('target')
        eval_model.set_feature_columns(['feat_A', 'feat_B'])

        self.arti = AI('test', 'test/test')
        self.arti.training_data = train_model
        self.arti.evaluation_data = eval_model
 def setUp(self):
     self.ml_model = AI(project_name='test',
                        log_dir='test/dir',
                        name='test')
Esempio n. 8
0
class TestCategoricalDataSplitter(unittest.TestCase):

    def setUp(self) -> None:
        self.arti = AI('test', 'test2')

        data = {'col1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 0],
                'col2': ['train', 'eval', 'train', 'eval', 'eval', 'train1', 'eval', 'train', 'train', 'train1']}

        self.dataframe = pd.DataFrame(data=data)

    def test_build_training_data(self):
        builder = CategoricalDataSplitter(data_source='training', column_name='col2',
                                          training_categories=['train', 'train1'])

        training_model = DataModel(self.dataframe)
        self.arti.set_training_data(training_model)

        self.arti = builder.build(ml_model=self.arti)

        split_training_data = self.arti.get_training_data().get_dataframe()
        split_evaluation_data = self.arti.get_evaluation_data().get_dataframe()

        self.assertEqual(6, len(split_training_data))
        for item in split_training_data.values:
            self.assertTrue('train' in item[1])
            self.assertTrue(item[0] in [1, 2, 3, 6, 8, 9, 0], f' item {item[0]} not found.')

        self.assertEqual(4, len(split_evaluation_data))
        for item in split_evaluation_data.values:
            self.assertEqual('eval', item[1])
            self.assertTrue(item[0] in [2, 4, 5, 7], f' item {item[0]} not found.')

    def test_build_evaluation_data(self):
        builder = CategoricalDataSplitter(data_source='evaluation', column_name='col2',
                                          training_categories=['train', 'train1'])

        evaluation_model = DataModel(self.dataframe)
        self.arti.set_evaluation_data(evaluation_model)

        self.arti = builder.build(ml_model=self.arti)

        split_training_data = self.arti.get_training_data().get_dataframe()
        split_evaluation_data = self.arti.get_evaluation_data().get_dataframe()

        self.assertEqual(6, len(split_training_data))
        for item in split_training_data.values:
            self.assertTrue('train' in item[1])
            self.assertTrue(item[0] in [1, 2, 3, 6, 8, 9, 0], f' item {item[0]} not found.')

        self.assertEqual(4, len(split_evaluation_data))
        for item in split_evaluation_data.values:
            self.assertEqual('eval', item[1])
            self.assertTrue(item[0] in [2, 4, 5, 7], f' item {item[0]} not found.')

    def test_evaluation_categories(self):
        builder = CategoricalDataSplitter(data_source='training', column_name='col2',
                                          eval_categories=['eval'])

        training_model = DataModel(self.dataframe)
        self.arti.set_training_data(training_model)

        self.arti = builder.build(ml_model=self.arti)

        split_training_data = self.arti.get_training_data().get_dataframe()
        split_evaluation_data = self.arti.get_evaluation_data().get_dataframe()

        self.assertEqual(6, len(split_training_data))
        for item in split_training_data.values:
            self.assertTrue('train' in item[1])
            self.assertTrue(item[0] in [1, 2, 3, 6, 8, 9, 0], f' item {item[0]} not found.')

        self.assertEqual(4, len(split_evaluation_data))
        for item in split_evaluation_data.values:
            self.assertEqual('eval', item[1])
            self.assertTrue(item[0] in [2, 4, 5, 7], f' item {item[0]} not found.')

    def test_both_categories(self):
        builder = CategoricalDataSplitter(data_source='training', column_name='col2',
                                          eval_categories=['eval'], training_categories=['train', 'train1'])

        training_model = DataModel(self.dataframe)
        self.arti.set_training_data(training_model)

        self.arti = builder.build(ml_model=self.arti)

        split_training_data = self.arti.get_training_data().get_dataframe()
        split_evaluation_data = self.arti.get_evaluation_data().get_dataframe()

        self.assertEqual(6, len(split_training_data))
        for item in split_training_data.values:
            self.assertTrue('train' in item[1])
            self.assertTrue(item[0] in [1, 2, 3, 6, 8, 9, 0], f' item {item[0]} not found.')

        self.assertEqual(4, len(split_evaluation_data))
        for item in split_evaluation_data.values:
            self.assertEqual('eval', item[1])
            self.assertTrue(item[0] in [2, 4, 5, 7], f' item {item[0]} not found.')

    def test_missing_categories(self):
        builder = CategoricalDataSplitter(data_source='training', column_name='col2',
                                          eval_categories=['eval'], training_categories=['train'])

        training_model = DataModel(self.dataframe)
        self.arti.set_training_data(training_model)

        with self.assertRaises(AssertionError):
            self.arti = builder.build(ml_model=self.arti)

    def test_no_categories(self):
        builder = CategoricalDataSplitter(data_source='training', column_name='col2')

        training_model = DataModel(self.dataframe)
        self.arti.set_training_data(training_model)

        with self.assertRaises(AssertionError):
            self.arti = builder.build(ml_model=self.arti)