def run(name, df, split_mask, acc_func, conf_func, target, train, learn_kwargs): import mindsdb_native import numpy as np train_indexes = [] test_indexes = [] validation_indexes = [] for i, ele in enumerate(split_mask): if ele: if np.random.rand() < 0.9: train_indexes.append(i) else: test_indexes.append(i) else: validation_indexes.append(i) predictor = mindsdb_native.Predictor(name=name) if train: predictor.learn(from_data=df, to_predict=target, advanced_args={ 'data_split_indexes': { 'train_indexes': train_indexes, 'test_indexes': test_indexes, 'validation_indexes': validation_indexes }, 'deduplicate_data': False }, **learn_kwargs) df_test = predictor.transaction.input_data.validation_df predictions = predictor.predict(when_data=df_test) predictions = [x.explanation for x in predictions] accuracy = acc_func([x[target]['predicted_value'] for x in predictions], df_test[target]) confidence_accuracy = None if conf_func is not None: try: confidence_accuracy = conf_func([x[target] for x in predictions], list(df_test[target]), acc_func) except: confidence_accuracy = conf_func([x[target] for x in predictions], list(df_test[target])) return accuracy, confidence_accuracy
def run(self): ''' running at subprocess due to ValueError: signal only works in main thread this is work for celery worker here? ''' import mindsdb_native config = Config() fs_store = FsSotre() company_id = os.environ.get('MINDSDB_COMPANY_ID', None) name, from_data, to_predict, kwargs, datasource_id = self._args mdb = mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'}) predictor_record = Predictor.query.filter_by(company_id=company_id, name=name).first() predictor_record.datasource_id = datasource_id predictor_record.to_predict = to_predict predictor_record.version = mindsdb_native.__version__ predictor_record.data = {'name': name, 'status': 'training'} #predictor_record.datasource_id = ... <-- can be done once `learn` is passed a datasource name session.commit() to_predict = to_predict if isinstance(to_predict, list) else [to_predict] data_source = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs']) try: mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs) except Exception as e: pass fs_store.put(name, f'predictor_{company_id}_{predictor_record.id}', config['paths']['predictors']) model_data = mindsdb_native.F.get_model_data(name) predictor_record = Predictor.query.filter_by(company_id=company_id, name=name).first() predictor_record.data = model_data session.commit() DatabaseWrapper().register_predictors([model_data])
def predict(self, name, when_data=None, kwargs={}): # @TODO Separate into two paths, one for "normal" predictions and one for "real time" predictions. Use the multiprocessing code commented out bellow for normal (once we figure out how to return the prediction object... else use the inline code but with the "real time" predict functionality of mindsdb_native taht will be implemented later) ''' from_data = when if when is not None else when_data p = PredictorProcess(name, from_data, to_predict=None, kwargs=kwargs, config=self.config.get_all(), 'predict') p.start() predictions = p.join() ''' mdb = mindsdb_native.Predictor(name=name) predictions = mdb.predict(when_data=when_data, run_confidence_variation_analysis=isinstance( when_data, list) is False or len(when_data) == 1, **kwargs) return predictions
def predict(self, name, pred_format, when_data=None, kwargs={}): from mindsdb_datasources import FileDS, ClickhouseDS, MariaDS, MySqlDS, PostgresDS, MSSQLDS, MongoDS, SnowflakeDS, AthenaDS import mindsdb_native from mindsdb.interfaces.storage.db import session, Predictor create_process_mark('predict') if name not in self.predictor_cache: # Clear the cache entirely if we have less than 1.2 GB left if psutil.virtual_memory().available < 1.2 * pow(10, 9): self.predictor_cache = {} predictor_record = Predictor.query.filter_by(company_id=self.company_id, name=name, is_custom=False).first() if predictor_record.data['status'] == 'complete': self.fs_store.get(name, f'predictor_{self.company_id}_{predictor_record.id}', self.config['paths']['predictors']) self.predictor_cache[name] = { 'predictor': mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'}), 'created': datetime.datetime.now() } if isinstance(when_data, dict) and 'kwargs' in when_data and 'args' in when_data: data_source = getattr(mindsdb_datasources, when_data['class'])(*when_data['args'], **when_data['kwargs']) else: # @TODO: Replace with Datasource try: data_source = pd.DataFrame(when_data) except Exception: data_source = when_data predictions = self.predictor_cache[name]['predictor'].predict( when_data=when_data, **kwargs ) if pred_format == 'explain' or pred_format == 'new_explain': predictions = [p.explain() for p in predictions] elif pred_format == 'dict': predictions = [p.as_dict() for p in predictions] elif pred_format == 'dict&explain': predictions = [[p.as_dict() for p in predictions], [p.explain() for p in predictions]] else: delete_process_mark('predict') raise Exception(f'Unkown predictions format: {pred_format}') delete_process_mark('predict') return self._pack(predictions)
def run(self): ''' running at subprocess due to ValueError: signal only works in main thread this is work for celery worker here? ''' import mindsdb_native name, from_data, to_predict, kwargs, config, trx_type = self._args mdb = mindsdb_native.Predictor(name=name) if trx_type == 'learn': to_predict = to_predict if isinstance(to_predict, list) else [to_predict] data_source = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs']) mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs) stats = mindsdb_native.F.get_model_data(name)['data_analysis_v2'] DatabaseWrapper(config).register_predictors([{ 'name': name, 'predict': to_predict, 'data_analysis': stats }], setup=False) if trx_type == 'predict': if isinstance(from_data, dict): when_data = from_data else: when_data = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs']) predictions = mdb.predict(when_data=when_data, run_confidence_variation_analysis=True, **kwargs) # @TODO Figure out a way to recover this since we are using `spawn` here... simple Queue or instiating a Multiprocessing manager and registering a value in a dict using that. Or using map from a multiprocessing pool with 1x process (though using a custom process there might be it's own bucket of annoying) return predictions
def predict(self, name, when_data=None, kwargs={}): if name not in self.predictor_cache: # Clear the cache entirely if we have less than .12 GB left if psutil.virtual_memory().available < 1.2 * pow(10, 9): self.predictor_cache = {} if F.get_model_data(name)['status'] == 'complete': self.predictor_cache[name] = { 'predictor': mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'}), 'created': datetime.datetime.now() } predictions = self.predictor_cache[name]['predictor'].predict( when_data=when_data, **kwargs) return predictions
def run(self): ''' running at subprocess due to ValueError: signal only works in main thread this is work for celery worker here? ''' import mindsdb_native name, from_data, to_predict, kwargs, config = self._args mdb = mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'}) to_predict = to_predict if isinstance(to_predict, list) else [to_predict] data_source = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs']) mdb.learn( from_data=data_source, to_predict=to_predict, **kwargs ) model_data = mindsdb_native.F.get_model_data(name) DatabaseWrapper(config).register_predictors([model_data])
def __init__(self, config): self.config = config self.metapredictor = mindsdb_native.Predictor('metapredictor') self.unregister_from = [] try: assert (config['integrations']['default_clickhouse']['enabled'] == True) from mindsdb.interfaces.clickhouse.clickhouse import Clickhouse clickhouse = Clickhouse(self.config) self.unregister_from.append(clickhouse) except Exception as e: print(e) pass try: assert ( config['integrations']['default_mariadb']['enabled'] == True) from mindsdb.interfaces.mariadb.mariadb import Mariadb mariadb = Mariadb(self.config) self.unregister_from.append(mariadb) except Exception as e: print(e) pass
import mindsdb_native model = mindsdb_native.Predictor(name='wine_model') predictions = model.predict(when_data='wine_data_predict.tsv') for index, prediction in enumerate(predictions): Cultivar = prediction['Cultivar'] Cultivar_confidence = prediction['Cultivar_confidence'] print(f'Predicted cultivar {Cultivar} for row with index {index}')
def create(self, name): self._setup_for_creation(name) predictor = mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'}) return predictor
def run(self): ''' running at subprocess due to ValueError: signal only works in main thread this is work for celery worker here? ''' import sys import mindsdb_native from mindsdb.utilities.config import Config name, from_data, to_predict, kwargs, config, trx_type = self._args config = Config(config) mdb = mindsdb_native.Predictor(name=name) if trx_type == 'learn': data_source = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs']) kwargs['use_gpu'] = config.get('use_gpu', None) mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs) stats = mdb.get_model_data()['data_analysis_v2'] try: assert (config['integrations']['default_clickhouse']['enabled'] == True) from mindsdb.interfaces.clickhouse.clickhouse import Clickhouse clickhouse = Clickhouse(config) clickhouse.register_predictor(name, stats) except: pass try: assert (config['integrations']['default_mariadb']['enabled'] == True) from mindsdb.interfaces.mariadb.mariadb import Mariadb mariadb = Mariadb(config) mariadb.register_predictor(name, stats) except: pass if trx_type == 'predict': if isinstance(from_data, dict): when = from_data when_data = None else: when_data = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs']) when = None kwargs['use_gpu'] = config.get('use_gpu', None) predictions = mdb.predict(when=when, when_data=when_data, run_confidence_variation_analysis=True, **kwargs) return predictions