def unregister_predictor(self, name): for integration in self._get_integrations(): if integration.check_connection(): integration.unregister_predictor(name) else: logger.warning( f"There is no connection to {integration.name}. predictor wouldn't be unregistred" )
def setup_integration(self, db_alias): try: # If this is the name of an integration integration = self._get_integration(db_alias) if integration != True: integration.setup() except Exception as e: logger.warning('Failed to integrate with database ' + integration.name + f', error: {e}')
def register_predictors(self, model_data_arr): for integration in self._get_integrations(): if integration.check_connection(): integration.register_predictors(model_data_arr) else: logger.warning( f"There is no connection to {integration.name}. predictor wouldn't be registred." )
def _get_integration(self, db_alias): integration = self.integration_controller.get(db_alias) if integration: db_type = integration['type'] if db_type in self.known_dbs: return self.known_dbs[db_type](self.config, db_alias, integration) logger.warning(f'Uknown integration type: {db_type} for database called: {db_alias}') return False return True
def register_predictors(self, model_data_arr): for integration in self._get_integrations(): if integration.check_connection(): try: integration.register_predictors(model_data_arr) except Exception as e: logger.warning(f"Error {e} when trying to register predictor to {integration.name}. Predictor wouldn't be registred.") else: logger.warning(f"There is no connection to {integration.name}. Predictor wouldn't be registred.")
def _get_integration(self, db_alias): if self.config['integrations'][db_alias]['publish']: db_type = self.config['integrations'][db_alias]['type'] if db_type in self.known_dbs: return self.known_dbs[db_type](self.config, db_alias) logger.warning( f'Uknown integration type: {db_type} for database called: {db_alias}' ) return False return True
def setup_integration(self, db_alias): try: # If this is the name of an integration integration = self._get_integration(db_alias) if integration is False: raise Exception(f'Unkonw database integration type for: {db_alias}') if integration is not True: integration.setup() except Exception as e: logger.warning('Failed to integrate with database ' + db_alias + f', error: {e}')
def unregister_predictor(self, name): for integration in self._get_integrations(publish=True): # FIXME # !!! Integrations from config.json add to db on each start!!!! if '@@@@@' in name: sn = name.split('@@@@@') assert len(sn) < 3 # security name = sn[1] if integration.check_connection(): integration.unregister_predictor(name) else: logger.warning(f"There is no connection to {integration.name}. predictor wouldn't be unregistred")
def register_predictors(self, model_data_arr, setup=True): it = self._get_integrations() for integration in it: register = True if setup: register = self._setup_integration(integration) if register: if integration.check_connection(): integration.register_predictors(model_data_arr) else: logger.warning( f"There is no connection to {integration.name}. predictor wouldn't be registred." ) integration = [integration]
def register_predictors(self, model_data_arr, integration_name=None): if integration_name is None: integrations = self._get_integrations(publish=True) else: integration = self._get_integration(integration_name) integrations = [] if isinstance(integration, bool) else [integration] for integration in integrations: if integration.check_connection(): try: integration.register_predictors(model_data_arr) except Exception as e: logger.warning(f"Error {e} when trying to register predictor to {integration.name}. Predictor wouldn't be registred.") else: logger.warning(f"There is no connection to {integration.name}. Predictor wouldn't be registred.")
def _get_integration(self, db_alias): if self.config['integrations'][db_alias]['publish']: db_type = self.config['integrations'][db_alias]['type'] if db_type == 'clickhouse': return Clickhouse(self.config, db_alias) elif db_type == 'mariadb': return Mariadb(self.config, db_alias) elif db_type == 'mysql': return MySQL(self.config, db_alias) elif db_type == 'postgres': return PostgreSQL(self.config, db_alias) elif db_type == 'mssql': return MSSQL(self.config, db_alias) elif db_type == 'mongodb': return MongoDB(self.config, db_alias) else: logger.warning(f'Uknown integration type: {db_type} for database called: {db_alias}') return False return True
def predict(self, name: str, when_data: Union[dict, list, pd.DataFrame], pred_format: str, company_id: int): original_name = name name = f'{company_id}@@@@@{name}' predictor_record = db.session.query(db.Predictor).filter_by( company_id=company_id, name=original_name).first() assert predictor_record is not None predictor_data = self.get_model_data(name, company_id) fs_name = f'predictor_{company_id}_{predictor_record.id}' if (name in self.predictor_cache and self.predictor_cache[name]['updated_at'] != predictor_record.updated_at): del self.predictor_cache[name] if name not in self.predictor_cache: # Clear the cache entirely if we have less than 1.2 GB left if psutil.virtual_memory().available < 1.2 * pow(10, 9): self.predictor_cache = {} if predictor_data['status'] == 'complete': self.fs_store.get(fs_name, fs_name, self.config['paths']['predictors']) self.predictor_cache[name] = { 'predictor': lightwood.predictor_from_state( os.path.join(self.config['paths']['predictors'], fs_name), predictor_record.code), 'updated_at': predictor_record.updated_at, 'created': datetime.datetime.now(), 'code': predictor_record.code, 'pickle': str( os.path.join(self.config['paths']['predictors'], fs_name)) } else: raise Exception( f'Trying to predict using predictor {original_name} with status: {predictor_data["status"]}. Error is: {predictor_data.get("error", "unknown")}' ) if isinstance(when_data, dict) and 'kwargs' in when_data and 'args' in when_data: ds_cls = getattr(mindsdb_datasources, when_data['class']) df = ds_cls(*when_data['args'], **when_data['kwargs']).df else: if isinstance(when_data, dict): when_data = [when_data] df = pd.DataFrame(when_data) predictions = self.predictor_cache[name]['predictor'].predict(df) predictions = predictions.to_dict(orient='records') # Bellow is useful for debugging caching and storage issues # del self.predictor_cache[name] target = predictor_record.to_predict[0] if pred_format in ('explain', 'dict', 'dict&explain'): explain_arr = [] dict_arr = [] for i, row in enumerate(predictions): explain_arr.append({ target: { 'predicted_value': row['prediction'], 'confidence': row.get('confidence', None), 'confidence_lower_bound': row.get('lower', None), 'confidence_upper_bound': row.get('upper', None), 'anomaly': row.get('anomaly', None), 'truth': row.get('truth', None) } }) td = {'predicted_value': row['prediction']} for col in df.columns: if col in row: td[col] = row[col] elif f'order_{col}' in row: td[col] = row[f'order_{col}'] elif f'group_{col}' in row: td[col] = row[f'group_{col}'] else: orginal_index = row.get('original_index') if orginal_index is None: log.warning('original_index is None') orginal_index = i td[col] = df.iloc[orginal_index][col] dict_arr.append({target: td}) if pred_format == 'explain': return explain_arr elif pred_format == 'dict': return dict_arr elif pred_format == 'dict&explain': return dict_arr, explain_arr # New format -- Try switching to this in 2-3 months for speed, for now above is ok else: return predictions