class DataSourceDataNode(DataNode): type = 'mindsdb-datasource' def __init__(self, config): self.config = config self.datastore = DataStore(config) # self.mindsdb_native = MindsdbNative(config) def getTables(self): dss = self.datastore.get_datasources() return [x['name'] for x in dss] def hasTable(self, table): return table in self.getTables() def getTableColumns(self, table): ds = self.datastore.get_datasource(table) return [x['name'] for x in ds['columns']] def select(self, table, columns=None, where=None, where_data=None, order_by=None, group_by=None, came_from=None): data = self.datastore.get_data(table, where=None, limit=None, offset=None) return data['data']
class IntegrationDataNode(DataNode): type = 'integration' def __init__(self, config, integration_name): self.config = config self.integration_name = integration_name self.default_store = DataStore() def getType(self): return self.type def getTables(self): return [] def hasTable(self, tableName): return True def getTableColumns(self, tableName): return [] def select(self, table=None, columns=None, where=None, where_data=None, order_by=None, group_by=None, came_from=None): if isinstance(where, dict): where = [where] if isinstance(where, list): for el in where: if isinstance(el, dict): for key in el: if isinstance( el[key], list) and len(el[key]) > 0 and isinstance( el[key][0], str) and '.' in el[key][0]: el[key][0] = el[key][0][el[key][0].find('.') + 1:] where = {'and': where} query = format({"from": table, 'select': columns, "where": where}) ds, ds_name = self.default_store.save_datasource( f'temp_ds_{int(time.time()*100)}', self.integration_name, {'query': query}) dso = self.default_store.get_datasource_obj(ds_name) data = dso.df.T.to_dict().values() self.default_store.delete_datasource(ds_name) return data
def run_environment(db, config): DEFAULT_DB = f'default_{db}' temp_config_path = prepare_config(config, DEFAULT_DB) if is_container_run(f'{db}-test') is False: subprocess.Popen(['./cli.sh', db], cwd=TESTS_ROOT.joinpath('docker/').resolve(), stdout=OUTPUT, stderr=OUTPUT) atexit.register(stop_container, name=db) db_ready = wait_db(config, DEFAULT_DB) if db_ready: sp = subprocess.Popen([ 'python3', '-m', 'mindsdb', '--api', 'mysql', '--config', temp_config_path ], stdout=OUTPUT, stderr=OUTPUT) atexit.register(stop_mindsdb, sp=sp) api_ready = db_ready and wait_api_ready(config) if db_ready is False or api_ready is False: print( f'Failed by timeout. {db} started={db_ready}, MindsDB started={api_ready}' ) raise Exception() mdb = MindsdbNative(config) datastore = DataStore(config) return mdb, datastore
def initialize_interfaces(app): app.default_store = DataStore() app.naitve_interface = NativeInterface() app.custom_models = CustomModels() app.dbw = DatabaseWrapper() config = Config() app.config_obj = config
def startProxy(config): global HARDCODED_USER global HARDCODED_PASSWORD global CERT_PATH global default_store global mdb global datahub """ Create a server and wait for incoming connections until Ctrl-C """ init_logger(config) HARDCODED_USER = config['api']['mysql']['user'] HARDCODED_PASSWORD = config['api']['mysql']['password'] CERT_PATH = config['api']['mysql']['certificate_path'] default_store = DataStore(config) mdb = MindsdbNative(config) datahub = init_datahub(config) host = config['api']['mysql']['host'] port = int(config['api']['mysql']['port']) log.info(f'Starting MindsDB Mysql proxy server on tcp://{host}:{port}') # Create the server if config['debug'] is True: SocketServer.TCPServer.allow_reuse_address = True server = SocketServer.ThreadingTCPServer((host, port), MysqlProxy) atexit.register(MysqlProxy.server_close, srv=server) # Activate the server; this will keep running until you # interrupt the program with Ctrl-C log.info('Waiting for incoming connections...') server.serve_forever()
def delete_model(self, name, company_id: int): original_name = name name = f'{company_id}@@@@@{name}' db_p = db.session.query(db.Predictor).filter_by( company_id=company_id, name=original_name).first() if db_p is None: raise Exception(f"Predictor '{name}' does not exist") db.session.delete(db_p) if db_p.datasource_id is not None: try: dataset_record = db.Datasource.query.get(db_p.datasource_id) if (isinstance(dataset_record.data, str) and json.loads( dataset_record.data).get('source_type') != 'file'): DataStore().delete_datasource(dataset_record.name, company_id) except Exception: pass db.session.commit() DatabaseWrapper(company_id).unregister_predictor(name) # delete from s3 self.fs_store.delete(f'predictor_{company_id}_{db_p.id}') return 0
def __init__(self, name, predictor, stream_in, stream_out, anomaly_stream=None, learning_stream=None, learning_threshold=100): self.name = name self.predictor = predictor self.stream_in = stream_in self.stream_out = stream_out self.anomaly_stream = anomaly_stream self.learning_stream = learning_stream self.learning_threshold = learning_threshold self.learning_data = [] self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None) self.stop_event = Event() self.model_interface = ModelInterfaceWrapper(ModelInterface()) self.data_store = DataStore() self.config = Config() p = db.session.query(db.Predictor).filter_by( company_id=self.company_id, name=self.predictor).first() if p is None: raise Exception(f'Predictor {predictor} doesn\'t exist') self.target = p.to_predict[0] ts_settings = p.learn_args.get('timeseries_settings', None) if not ts_settings['is_timeseries']: ts_settings = None if ts_settings is None: self.thread = Thread(target=StreamController._make_predictions, args=(self, )) else: self.ts_settings = ts_settings self.thread = Thread(target=StreamController._make_ts_predictions, args=(self, )) self.thread.start()
def run_update(name: str, company_id: int): original_name = name name = f'{company_id}@@@@@{name}' fs_store = FsStore() config = Config() data_store = DataStoreWrapper(DataStore(), company_id) try: predictor_record = Predictor.query.filter_by(company_id=company_id, name=original_name).first() assert predictor_record is not None predictor_record.update_status = 'updating' session.commit() ds = data_store.get_datasource_obj(None, raw=False, id=predictor_record.datasource_id) df = ds.df problem_definition = predictor_record.learn_args problem_definition['target'] = predictor_record.to_predict[0] if 'join_learn_process' in problem_definition: del problem_definition['join_learn_process'] # Adapt kwargs to problem definition if 'timeseries_settings' in problem_definition: problem_definition['timeseries_settings'] = problem_definition['timeseries_settings'] if 'stop_training_in_x_seconds' in problem_definition: problem_definition['time_aim'] = problem_definition['stop_training_in_x_seconds'] json_ai = lightwood.json_ai_from_problem(df, problem_definition) predictor_record.json_ai = json_ai.to_dict() predictor_record.code = lightwood.code_from_json_ai(json_ai) predictor_record.data = {'training_log': 'training'} session.commit() predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code) predictor.learn(df) fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}' pickle_path = os.path.join(config['paths']['predictors'], fs_name) predictor.save(pickle_path) fs_store.put(fs_name, fs_name, config['paths']['predictors']) predictor_record.data = predictor.model_analysis.to_dict() # type: ignore session.commit() predictor_record.lightwood_version = lightwood.__version__ predictor_record.mindsdb_version = mindsdb_version predictor_record.update_status = 'up_to_date' session.commit() except Exception as e: log.error(e) predictor_record.update_status = 'update_failed' # type: ignore session.commit() return str(e)
def __init__(self, config): mongodb_config = config['api'].get('mongodb') assert mongodb_config is not None, 'is no mongodb config!' host = mongodb_config['host'] port = mongodb_config['port'] print(f'start mongo server on {host}:{port}') super().__init__((host, int(port)), MongoRequestHandler) self.mindsdb_env = { 'config': config, 'data_store': DataStore(config), 'mindsdb_native': MindsdbNative(config) } respondersCollection = RespondersCollection() opQueryResponder = OpQueryResponder(respondersCollection) opMsgResponder = OpMsgResponder(respondersCollection) opInsertResponder = OpInsertResponder(respondersCollection) self.operationsHandlersMap = { OP_QUERY: opQueryResponder, OP_MSG: opMsgResponder, OP_INSERT: opInsertResponder } respondersCollection.add( when={'drop': 'system.sessions'}, result={'ok': 1} ) respondersCollection.add( when={'update': 'system.version'}, result={'ok': 1} ) respondersCollection.add( when={'setFeatureCompatibilityVersion': helpers.is_true}, result={'ok': 1} ) # OpMSG=OrderedDict([('features', 1), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599748325, 1)), ('signature', OrderedDict([('hash', b'\xb8\xc3\x03\x18\xca\xe6bh\xf0\xcb47,\x924\x8a >\xfc\x91'), ('keyId', 6870854312365391875)]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599748325, 1)), ('t', 1)]))])), ('$db', 'admin')]) respondersCollection.add( when={'features': helpers.is_true}, result={'ok': 1} ) # OpMSG=OrderedDict([('serverStatus', 1), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599748366, 1)), ('signature', OrderedDict([('hash', b'\xa1E}\xbbIU\xc2D\x95++\x82\x88\xb5\x84\xf5\xda)+B'), ('keyId', 6870854312365391875)]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599748366, 1)), ('t', 1)]))])), ('$db', 'admin')]) respondersCollection.add( when={'serverStatus': helpers.is_true}, result={'ok': 1} ) # OpMSG=OrderedDict([('ismaster', 1), ('$db', 'admin'), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599749031, 1)), ('signature', OrderedDict([('hash', b'6\x87\xd5Y\xa7\xc7\xcf$\xab\x1e\xa2{\xe5B\xe5\x99\xdbl\x8d\xf4'), ('keyId', 6870854312365391875)]))])), ('$client', OrderedDict([('application', OrderedDict([('name', 'MongoDB Shell')])), ('driver', OrderedDict([('name', 'MongoDB Internal Client'), ('version', '3.6.3')])), ('os', OrderedDict([('type', 'Linux'), ('name', 'Ubuntu'), ('architecture', 'x86_64'), ('version', '18.04')])), ('mongos', OrderedDict([('host', 'maxs-comp:27103'), ('client', '127.0.0.1:52148'), ('version', '3.6.3')]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599749031, 1)), ('t', 1)]))]))]) respondersCollection.responders += op_msg_responders
def run_environment(config, apis=['mysql'], override_integration_config={}, override_api_config={}, mindsdb_database='mindsdb', clear_storage=True): temp_config_path = prepare_config(config, mindsdb_database, override_integration_config, override_api_config, clear_storage) config = Config(temp_config_path) api_str = ','.join(apis) sp = subprocess.Popen([ 'python3', '-m', 'mindsdb', '--api', api_str, '--config', temp_config_path, '--verbose' ], close_fds=True, stdout=OUTPUT, stderr=OUTPUT) atexit.register(stop_mindsdb, sp=sp) async def wait_port_async(port, timeout): start_time = time.time() started = is_port_in_use(port) while (time.time() - start_time) < timeout and started is False: await asyncio.sleep(1) started = is_port_in_use(port) return started async def wait_apis_start(ports): futures = [wait_port_async(port, 60) for port in ports] success = True for i, future in enumerate(asyncio.as_completed(futures)): success = success and await future return success ports_to_wait = [config['api'][api]['port'] for api in apis] ioloop = asyncio.get_event_loop() if ioloop.is_closed(): ioloop = asyncio.new_event_loop() success = ioloop.run_until_complete(wait_apis_start(ports_to_wait)) ioloop.close() if not success: raise Exception('Cant start mindsdb apis') CONFIG.MINDSDB_STORAGE_PATH = config.paths['predictors'] mdb = NativeInterface(config) datastore = DataStore(config) return mdb, datastore
def run_environment(db, config, run_apis='mysql'): DEFAULT_DB = f'default_{db}' temp_config_path = prepare_config(config, DEFAULT_DB) if db in ['mssql', 'mongodb']: db_ready = True else: if is_container_run(f'{db}-test') is False: run_container(db) db_ready = wait_db(config, DEFAULT_DB) if isinstance(run_apis, list) is False: run_apis = run_apis.split(',') api_str = ','.join(run_apis) if db_ready: sp = subprocess.Popen([ 'python3', '-m', 'mindsdb', '--api', api_str, '--config', temp_config_path ], stdout=OUTPUT, stderr=OUTPUT) atexit.register(stop_mindsdb, sp=sp) api_ready = True for api in run_apis: apistr = 'mongodb' if api == 'mongodb' else api api_ready = api_ready and wait_api_ready(config, apistr) if api_ready is False: break if db_ready is False or api_ready is False: print( f'Failed by timeout. {db} started={db_ready}, MindsDB started={api_ready}' ) raise Exception() CONFIG.MINDSDB_STORAGE_PATH = config.paths['predictors'] mdb = MindsdbNative(config) datastore = DataStore(config) return mdb, datastore
def update_model(self, name): from mindsdb_native import F from mindsdb_worker.updater.update_model import update_model from mindsdb.interfaces.storage.db import session, Predictor from mindsdb.interfaces.datastore.datastore import DataStore try: predictor_record = Predictor.query.filter_by( company_id=self.company_id, name=name, is_custom=False).first() predictor_record.update_status = 'updating' session.commit() update_model(name, self.delete_model, F.delete_model, self.learn, self._lock_context, self.company_id, self.config['paths']['predictors'], predictor_record, self.fs_store, DataStore()) predictor_record = self._update_db_status(predictor_record) except Exception as e: log.error(e) predictor_record.update_status = 'update_failed' session.commit() return str(e)
def __init__(self, config, integration_name): self.config = config self.integration_name = integration_name self.default_store = DataStore()
def __init__(self, config): self.config = config self.datastore = DataStore(config)
def initialize_interfaces(app): app.original_data_store = DataStore() app.original_model_interface = ModelInterface() config = Config() app.config_obj = config
class IntegrationDataNode(DataNode): type = 'integration' def __init__(self, config, integration_name): self.config = config self.integration_name = integration_name self.default_store = DataStore() def getType(self): return self.type def getTables(self): return [] def hasTable(self, tableName): return True def getTableColumns(self, tableName): return [] def select(self, table=None, columns=None, where=None, where_data=None, order_by=None, group_by=None, came_from=None): has_where = isinstance(where, (dict, list)) and len(where) > 0 if isinstance(where, dict): where = [where] if isinstance(where, list): for el in where: if isinstance(el, dict): for key in el: if isinstance( el[key], list) and len(el[key]) > 0 and isinstance( el[key][0], str) and '.' in el[key][0]: el[key][0] = el[key][0][el[key][0].find('.') + 1:] where = {'and': where} format_data = {'from': table, 'select': columns} if has_where: format_data['where'] = where query = format(format_data) ds, ds_name = self.default_store.save_datasource( f'temp_ds_{int(time.time()*100)}', self.integration_name, {'query': query}) dso = self.default_store.get_datasource_obj(ds_name) data = dso.df.to_dict(orient='records') for column_name in dso.df.columns: if pd.core.dtypes.common.is_datetime_or_timedelta_dtype( dso.df[column_name]): pass_data = dso.df[column_name].dt.to_pydatetime() for i, rec in enumerate(data): rec[column_name] = pass_data[i].timestamp() self.default_store.delete_datasource(ds_name) return data
class MindsDBDataNode(DataNode): type = 'mindsdb' def __init__(self, config): self.config = Config() self.mindsdb_native = NativeInterface() self.custom_models = CustomModels() self.ai_table = AITable_store() self.default_store = DataStore() def getTables(self): models = self.mindsdb_native.get_models() models = [x['name'] for x in models if x['status'] == 'complete'] models += ['predictors', 'commands'] models += [x['name'] for x in self.custom_models.get_models()] ai_tables = self.ai_table.get_ai_tables() models += [x['name'] for x in ai_tables] return models def hasTable(self, table): return table in self.getTables() def _get_ai_table_columns(self, table_name): aitable_record = self.ai_table.get_ai_table(table_name) columns = ([x['name'] for x in aitable_record.query_fields] + [x['name'] for x in aitable_record.predictor_columns]) return columns def _get_model_columns(self, table_name): model = self.mindsdb_native.get_model_data(name=table_name) columns = [] columns += model['columns'] columns += [f'{x}_original' for x in model['predict']] for col in model['predict']: if model['data_analysis_v2'][col]['typing'][ 'data_type'] == 'Numeric': columns += [f"{col}_min", f"{col}_max"] columns += [f"{col}_confidence"] columns += [f"{col}_explain"] return columns def getTableColumns(self, table): try: columns = self.custom_models.get_model_data(table)['columns'] columns += [ 'external_datasource', 'select_data_query', 'when_data' ] return columns except Exception: pass if table == 'predictors': return [ 'name', 'status', 'accuracy', 'predict', 'select_data_query', 'external_datasource', 'training_options' ] if table == 'commands': return ['command'] columns = [] ai_tables = self.ai_table.get_ai_table(table) if ai_tables is not None: columns = self._get_ai_table_columns(table) elif table in [x['name'] for x in self.mindsdb_native.get_models()]: columns = self._get_model_columns(table) columns += [ 'when_data', 'select_data_query', 'external_datasource' ] return columns def _select_predictors(self): models = self.mindsdb_native.get_models() # TODO add custom models return [ { 'name': x['name'], 'status': x['status'], 'accuracy': str(x['accuracy']) if x['accuracy'] is not None else None, 'predict': ', '.join(x['predict']), 'select_data_query': '', 'external_datasource': '', # TODO 'training_options': '' # TODO ? } for x in models ] def delete_predictor(self, name): self.mindsdb_native.delete_model(name) def _select_from_ai_table(self, table, columns, where): aitable_record = self.ai_table.get_ai_table(table) integration = aitable_record.integration_name query = aitable_record.integration_query predictor_name = aitable_record.predictor_name ds, ds_name = self.default_store.save_datasource( 'temp_ds', integration, {'query': query}) dso = self.default_store.get_datasource_obj(ds_name) res = self.mindsdb_native.predict(name=predictor_name, when_data=dso) self.default_store.delete_datasource(ds_name) keys_map = {} for f in aitable_record.predictor_columns: keys_map[f['value']] = f['name'] for f in aitable_record.query_fields: keys_map[f['name']] = f['name'] keys = list(keys_map.keys()) data = [] for i, el in enumerate(res): data.append({keys_map[key]: el[key] for key in keys}) return data def select(self, table, columns=None, where=None, where_data=None, order_by=None, group_by=None, came_from=None): ''' NOTE WHERE statements can be just $eq joined with 'and' ''' if table == 'predictors': return self._select_predictors() if table == 'commands': return [] if self.ai_table.get_ai_table(table): return self._select_from_ai_table(table, columns, where) original_when_data = None if 'when_data' in where: if len(where) > 1: raise ValueError( "Should not be used any other keys in 'where', if 'when_data' used" ) try: original_when_data = where['when_data']['$eq'] where_data = json.loads(where['when_data']['$eq']) if isinstance(where_data, list) is False: where_data = [where_data] except Exception: raise ValueError( f'''Error while parse 'when_data'="{where_data}"''') external_datasource = None if 'external_datasource' in where: external_datasource = where['external_datasource']['$eq'] del where['external_datasource'] select_data_query = None if came_from is not None and 'select_data_query' in where: select_data_query = where['select_data_query']['$eq'] del where['select_data_query'] dbtype = self.config['integrations'][came_from]['type'] if dbtype == 'clickhouse': ch = Clickhouse(self.config, came_from) res = ch._query( select_data_query.strip(' ;\n') + ' FORMAT JSON') data = res.json()['data'] elif dbtype == 'mariadb': maria = Mariadb(self.config, came_from) data = maria._query(select_data_query) elif dbtype == 'mysql': mysql = MySQL(self.config, came_from) data = mysql._query(select_data_query) elif dbtype == 'postgres': mysql = PostgreSQL(self.config, came_from) data = mysql._query(select_data_query) elif dbtype == 'mssql': mssql = MSSQL(self.config, came_from) data = mssql._query(select_data_query, fetch=True) else: raise Exception(f'Unknown database type: {dbtype}') if where_data is None: where_data = data else: where_data += data new_where = {} if where_data is not None: where_data = pandas.DataFrame(where_data) else: for key, value in where.items(): if isinstance(value, dict) is False or len( value.keys()) != 1 or list(value.keys())[0] != '$eq': # TODO value should be just string or number raise Exception() new_where[key] = value['$eq'] if len(new_where) == 0: return [] where_data = [new_where] try: model = self.custom_models.get_model_data(name=table) except Exception: model = self.mindsdb_native.get_model_data(name=table) predicted_columns = model['predict'] original_target_values = {} for col in predicted_columns: if where_data is not None: if col in where_data: original_target_values[col + '_original'] = list( where_data[col]) else: original_target_values[col + '_original'] = [None ] * len(where_data) else: original_target_values[col + '_original'] = [None] if table in [x['name'] for x in self.custom_models.get_models()]: res = self.custom_models.predict(name=table, when_data=where_data) data = [] fields = model['columns'] for i, ele in enumerate(res): row = {} row['select_data_query'] = select_data_query row['external_datasource'] = external_datasource row['when_data'] = original_when_data for key in ele: row[key] = ele[key]['predicted_value'] # FIXME prefer get int from mindsdb_native in this case if model['data_analysis_v2'][key]['typing'][ 'data_subtype'] == 'Int': row[key] = int(row[key]) for k in fields: if k not in ele: if isinstance(where_data, list): if k in where_data[i]: row[k] = where_data[i][k] else: row[k] = None elif k in where_data.columns: row[k] = where_data[k].iloc[i] else: row[k] = None for k in original_target_values: row[k] = original_target_values[k][i] data.append(row) field_types = { f: model['data_analysis_v2'][f]['typing']['data_subtype'] for f in fields if 'typing' in model['data_analysis_v2'][f] } for row in data: cast_row_types(row, field_types) return data else: res = self.mindsdb_native.predict(name=table, when_data=where_data) keys = [x for x in list(res._data.keys()) if x in columns] min_max_keys = [] for col in predicted_columns: if model['data_analysis_v2'][col]['typing'][ 'data_type'] == 'Numeric': min_max_keys.append(col) data = [] explains = [] for i, el in enumerate(res): data.append({key: el[key] for key in keys}) explains.append(el.explain()) field_types = { f: model['data_analysis_v2'][f]['typing']['data_subtype'] for f in model['columns'] if 'typing' in model['data_analysis_v2'][f] } for i, row in enumerate(data): cast_row_types(row, field_types) row['select_data_query'] = select_data_query row['external_datasource'] = external_datasource row['when_data'] = original_when_data for k in original_target_values: row[k] = original_target_values[k][i] explanation = explains[i] for key in predicted_columns: row[key + '_confidence'] = explanation[key]['confidence'] row[key + '_explain'] = json.dumps(explanation[key], cls=NumpyJSONEncoder, ensure_ascii=False) for key in min_max_keys: row[key + '_min'] = min( explanation[key]['confidence_interval']) row[key + '_max'] = max( explanation[key]['confidence_interval']) return data
def __init__(self, config): self.config = Config() self.mindsdb_native = NativeInterface() self.custom_models = CustomModels() self.ai_table = AITable_store() self.default_store = DataStore()
def initialize_interfaces(config, app): app.default_store = DataStore(config) app.mindsdb_native = NativeInterface(config) app.custom_models = CustomModels(config) app.dbw = DatabaseWrapper(config) app.config_obj = config
def initialize_interfaces(config, app): app.default_store = DataStore(config) app.mindsdb_native = MindsdbNative(config) app.config_obj = config
class StreamController: def __init__(self, name, predictor, stream_in, stream_out, anomaly_stream=None, learning_stream=None, learning_threshold=100): self.name = name self.predictor = predictor self.stream_in = stream_in self.stream_out = stream_out self.anomaly_stream = anomaly_stream self.learning_stream = learning_stream self.learning_threshold = learning_threshold self.learning_data = [] self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None) self.stop_event = Event() self.model_interface = ModelInterfaceWrapper(ModelInterface()) self.data_store = DataStore() self.config = Config() p = db.session.query(db.Predictor).filter_by( company_id=self.company_id, name=self.predictor).first() if p is None: raise Exception(f'Predictor {predictor} doesn\'t exist') self.target = p.to_predict[0] ts_settings = p.learn_args.get('timeseries_settings', None) if not ts_settings['is_timeseries']: ts_settings = None if ts_settings is None: self.thread = Thread(target=StreamController._make_predictions, args=(self, )) else: self.ts_settings = ts_settings self.thread = Thread(target=StreamController._make_ts_predictions, args=(self, )) self.thread.start() def _is_anomaly(self, res): for k in res: if k.endswith('_anomaly') and res[k] is not None: return True return False def _consider_learning(self): if self.learning_stream is not None: self.learning_data.extend(self.learning_stream.read()) if len(self.learning_data) >= self.learning_threshold: p = db.session.query(db.Predictor).filter_by( company_id=self.company_id, name=self.predictor).first() ds_record = db.session.query( db.Datasource).filter_by(id=p.datasource_id).first() df = pd.DataFrame.from_records(self.learning_data) name = 'name_' + str(time()).replace('.', '_') path = os.path.join(self.config['paths']['datasources'], name) df.to_csv(path) from_data = { 'class': 'FileDS', 'args': [path], 'kwargs': {}, } self.data_store.save_datasource(name=name, source_type='file', source=path, file_path=path, company_id=self.company_id) ds = self.data_store.get_datasource(name, self.company_id) self.model_interface.adjust(p.name, from_data, ds['id'], self.company_id) self.learning_data.clear() def _make_predictions(self): while not self.stop_event.wait(0.5): self._consider_learning() for when_data in self.stream_in.read(): preds = self.model_interface.predict(self.predictor, when_data, 'dict') for res in preds: if self.anomaly_stream is not None and self._is_anomaly( res): self.anomaly_stream.write(res) else: self.stream_out.write(res) def _make_ts_predictions(self): window = self.ts_settings['window'] order_by = self.ts_settings['order_by'] order_by = [order_by] if isinstance(order_by, str) else order_by group_by = self.ts_settings.get('group_by', None) group_by = [group_by] if isinstance(group_by, str) else group_by cache = Cache(self.name) while not self.stop_event.wait(0.5): self._consider_learning() for when_data in self.stream_in.read(): for ob in order_by: if ob not in when_data: raise Exception( f'when_data doesn\'t contain order_by[{ob}]') for gb in group_by: if gb not in when_data: raise Exception( f'when_data doesn\'t contain group_by[{gb}]') gb_value = tuple( when_data[gb] for gb in group_by) if group_by is not None else '' # because cache doesn't work for tuples # (raises Exception: tuple doesn't have "encode" attribute) gb_value = str(gb_value) with cache: if gb_value not in cache: cache[gb_value] = [] # do this because shelve-cache doesn't support # in-place changing records = cache[gb_value] records.append(when_data) cache[gb_value] = records with cache: for gb_value in cache.keys(): if len(cache[gb_value]) >= window: cache[gb_value] = [ *sorted( cache[gb_value], # WARNING: assuming wd[ob] is numeric key=lambda wd: tuple(wd[ob] for ob in order_by)) ] while len(cache[gb_value]) >= window: res_list = self.model_interface.predict( self.predictor, cache[gb_value][:window], 'dict') if self.anomaly_stream is not None and self._is_anomaly( res_list[-1]): self.anomaly_stream.write(res_list[-1]) else: self.stream_out.write(res_list[-1]) cache[gb_value] = cache[gb_value][1:]