Esempio n. 1
0
def run(name, df, split_mask, acc_func, conf_func, target, train,
        learn_kwargs):
    import mindsdb_native
    import numpy as np

    train_indexes = []
    test_indexes = []
    validation_indexes = []
    for i, ele in enumerate(split_mask):
        if ele:
            if np.random.rand() < 0.9:
                train_indexes.append(i)
            else:
                test_indexes.append(i)
        else:
            validation_indexes.append(i)

    predictor = mindsdb_native.Predictor(name=name)
    if train:
        predictor.learn(from_data=df,
                        to_predict=target,
                        advanced_args={
                            'data_split_indexes': {
                                'train_indexes': train_indexes,
                                'test_indexes': test_indexes,
                                'validation_indexes': validation_indexes
                            },
                            'deduplicate_data': False
                        },
                        **learn_kwargs)

    df_test = predictor.transaction.input_data.validation_df
    predictions = predictor.predict(when_data=df_test)
    predictions = [x.explanation for x in predictions]

    accuracy = acc_func([x[target]['predicted_value'] for x in predictions],
                        df_test[target])

    confidence_accuracy = None
    if conf_func is not None:
        try:
            confidence_accuracy = conf_func([x[target] for x in predictions],
                                            list(df_test[target]), acc_func)
        except:
            confidence_accuracy = conf_func([x[target] for x in predictions],
                                            list(df_test[target]))

    return accuracy, confidence_accuracy
Esempio n. 2
0
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import mindsdb_native

        config = Config()
        fs_store = FsSotre()
        company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
        name, from_data, to_predict, kwargs, datasource_id = self._args

        mdb = mindsdb_native.Predictor(name=name,
                                       run_env={'trigger': 'mindsdb'})

        predictor_record = Predictor.query.filter_by(company_id=company_id,
                                                     name=name).first()
        predictor_record.datasource_id = datasource_id
        predictor_record.to_predict = to_predict
        predictor_record.version = mindsdb_native.__version__
        predictor_record.data = {'name': name, 'status': 'training'}
        #predictor_record.datasource_id = ... <-- can be done once `learn` is passed a datasource name
        session.commit()

        to_predict = to_predict if isinstance(to_predict,
                                              list) else [to_predict]
        data_source = getattr(mindsdb_native,
                              from_data['class'])(*from_data['args'],
                                                  **from_data['kwargs'])
        try:
            mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs)
        except Exception as e:
            pass

        fs_store.put(name, f'predictor_{company_id}_{predictor_record.id}',
                     config['paths']['predictors'])

        model_data = mindsdb_native.F.get_model_data(name)

        predictor_record = Predictor.query.filter_by(company_id=company_id,
                                                     name=name).first()
        predictor_record.data = model_data
        session.commit()

        DatabaseWrapper().register_predictors([model_data])
Esempio n. 3
0
    def predict(self, name, when_data=None, kwargs={}):
        # @TODO Separate into two paths, one for "normal" predictions and one for "real time" predictions. Use the multiprocessing code commented out bellow for normal (once we figure out how to return the prediction object... else use the inline code but with the "real time" predict functionality of mindsdb_native taht will be implemented later)
        '''
        from_data = when if when is not None else when_data
        p = PredictorProcess(name, from_data, to_predict=None, kwargs=kwargs, config=self.config.get_all(), 'predict')
        p.start()
        predictions = p.join()
        '''
        mdb = mindsdb_native.Predictor(name=name)

        predictions = mdb.predict(when_data=when_data,
                                  run_confidence_variation_analysis=isinstance(
                                      when_data, list) is False
                                  or len(when_data) == 1,
                                  **kwargs)

        return predictions
Esempio n. 4
0
    def predict(self, name, pred_format, when_data=None, kwargs={}):
        from mindsdb_datasources import FileDS, ClickhouseDS, MariaDS, MySqlDS, PostgresDS, MSSQLDS, MongoDS, SnowflakeDS, AthenaDS
        import mindsdb_native
        from mindsdb.interfaces.storage.db import session, Predictor

        create_process_mark('predict')

        if name not in self.predictor_cache:
            # Clear the cache entirely if we have less than 1.2 GB left
            if psutil.virtual_memory().available < 1.2 * pow(10, 9):
                self.predictor_cache = {}

            predictor_record = Predictor.query.filter_by(company_id=self.company_id, name=name, is_custom=False).first()
            if predictor_record.data['status'] == 'complete':
                self.fs_store.get(name, f'predictor_{self.company_id}_{predictor_record.id}', self.config['paths']['predictors'])
                self.predictor_cache[name] = {
                    'predictor': mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'}),
                    'created': datetime.datetime.now()
                }

        if isinstance(when_data, dict) and 'kwargs' in when_data and 'args' in when_data:
            data_source = getattr(mindsdb_datasources, when_data['class'])(*when_data['args'], **when_data['kwargs'])
        else:
            # @TODO: Replace with Datasource
            try:
                data_source = pd.DataFrame(when_data)
            except Exception:
                data_source = when_data

        predictions = self.predictor_cache[name]['predictor'].predict(
            when_data=when_data,
            **kwargs
        )
        if pred_format == 'explain' or pred_format == 'new_explain':
            predictions = [p.explain() for p in predictions]
        elif pred_format == 'dict':
            predictions = [p.as_dict() for p in predictions]
        elif pred_format == 'dict&explain':
            predictions = [[p.as_dict() for p in predictions], [p.explain() for p in predictions]]
        else:
            delete_process_mark('predict')
            raise Exception(f'Unkown predictions format: {pred_format}')

        delete_process_mark('predict')
        return self._pack(predictions)
Esempio n. 5
0
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import mindsdb_native

        name, from_data, to_predict, kwargs, config, trx_type = self._args

        mdb = mindsdb_native.Predictor(name=name)

        if trx_type == 'learn':
            to_predict = to_predict if isinstance(to_predict,
                                                  list) else [to_predict]
            data_source = getattr(mindsdb_native,
                                  from_data['class'])(*from_data['args'],
                                                      **from_data['kwargs'])
            mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs)

            stats = mindsdb_native.F.get_model_data(name)['data_analysis_v2']

            DatabaseWrapper(config).register_predictors([{
                'name': name,
                'predict': to_predict,
                'data_analysis': stats
            }],
                                                        setup=False)

        if trx_type == 'predict':
            if isinstance(from_data, dict):
                when_data = from_data
            else:
                when_data = getattr(mindsdb_native,
                                    from_data['class'])(*from_data['args'],
                                                        **from_data['kwargs'])

            predictions = mdb.predict(when_data=when_data,
                                      run_confidence_variation_analysis=True,
                                      **kwargs)

            # @TODO Figure out a way to recover this since we are using `spawn` here... simple Queue or instiating a Multiprocessing manager and registering a value in a dict using that. Or using map from a multiprocessing pool with 1x process (though using a custom process there might be it's own bucket of annoying)
            return predictions
Esempio n. 6
0
    def predict(self, name, when_data=None, kwargs={}):
        if name not in self.predictor_cache:
            # Clear the cache entirely if we have less than .12 GB left
            if psutil.virtual_memory().available < 1.2 * pow(10, 9):
                self.predictor_cache = {}

            if F.get_model_data(name)['status'] == 'complete':
                self.predictor_cache[name] = {
                    'predictor':
                    mindsdb_native.Predictor(name=name,
                                             run_env={'trigger': 'mindsdb'}),
                    'created':
                    datetime.datetime.now()
                }

        predictions = self.predictor_cache[name]['predictor'].predict(
            when_data=when_data, **kwargs)

        return predictions
Esempio n. 7
0
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import mindsdb_native

        name, from_data, to_predict, kwargs, config = self._args
        mdb = mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'})

        to_predict = to_predict if isinstance(to_predict, list) else [to_predict]
        data_source = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs'])
        mdb.learn(
            from_data=data_source,
            to_predict=to_predict,
            **kwargs
        )

        model_data = mindsdb_native.F.get_model_data(name)

        DatabaseWrapper(config).register_predictors([model_data])
Esempio n. 8
0
    def __init__(self, config):
        self.config = config
        self.metapredictor = mindsdb_native.Predictor('metapredictor')
        self.unregister_from = []

        try:
            assert (config['integrations']['default_clickhouse']['enabled'] ==
                    True)
            from mindsdb.interfaces.clickhouse.clickhouse import Clickhouse
            clickhouse = Clickhouse(self.config)
            self.unregister_from.append(clickhouse)
        except Exception as e:
            print(e)
            pass

        try:
            assert (
                config['integrations']['default_mariadb']['enabled'] == True)
            from mindsdb.interfaces.mariadb.mariadb import Mariadb
            mariadb = Mariadb(self.config)
            self.unregister_from.append(mariadb)
        except Exception as e:
            print(e)
            pass
Esempio n. 9
0
import mindsdb_native

model = mindsdb_native.Predictor(name='wine_model')
predictions = model.predict(when_data='wine_data_predict.tsv')

for index, prediction in enumerate(predictions):
    Cultivar = prediction['Cultivar']
    Cultivar_confidence = prediction['Cultivar_confidence']
    print(f'Predicted cultivar {Cultivar} for row with index {index}')
Esempio n. 10
0
 def create(self, name):
     self._setup_for_creation(name)
     predictor = mindsdb_native.Predictor(name=name,
                                          run_env={'trigger': 'mindsdb'})
     return predictor
Esempio n. 11
0
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import sys
        import mindsdb_native

        from mindsdb.utilities.config import Config

        name, from_data, to_predict, kwargs, config, trx_type = self._args
        config = Config(config)

        mdb = mindsdb_native.Predictor(name=name)

        if trx_type == 'learn':
            data_source = getattr(mindsdb_native,
                                  from_data['class'])(*from_data['args'],
                                                      **from_data['kwargs'])

            kwargs['use_gpu'] = config.get('use_gpu', None)
            mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs)

            stats = mdb.get_model_data()['data_analysis_v2']

            try:
                assert (config['integrations']['default_clickhouse']['enabled']
                        == True)
                from mindsdb.interfaces.clickhouse.clickhouse import Clickhouse
                clickhouse = Clickhouse(config)
                clickhouse.register_predictor(name, stats)
            except:
                pass

            try:
                assert (config['integrations']['default_mariadb']['enabled'] ==
                        True)
                from mindsdb.interfaces.mariadb.mariadb import Mariadb
                mariadb = Mariadb(config)
                mariadb.register_predictor(name, stats)
            except:
                pass

        if trx_type == 'predict':
            if isinstance(from_data, dict):
                when = from_data
                when_data = None
            else:
                when_data = getattr(mindsdb_native,
                                    from_data['class'])(*from_data['args'],
                                                        **from_data['kwargs'])
                when = None

            kwargs['use_gpu'] = config.get('use_gpu', None)

            predictions = mdb.predict(when=when,
                                      when_data=when_data,
                                      run_confidence_variation_analysis=True,
                                      **kwargs)

            return predictions