Esempio n. 1
0
class DataSourceDataNode(DataNode):
    type = 'mindsdb-datasource'

    def __init__(self, config):
        self.config = config
        self.datastore = DataStore(config)
        # self.mindsdb_native = MindsdbNative(config)

    def getTables(self):
        dss = self.datastore.get_datasources()
        return [x['name'] for x in dss]

    def hasTable(self, table):
        return table in self.getTables()

    def getTableColumns(self, table):
        ds = self.datastore.get_datasource(table)
        return [x['name'] for x in ds['columns']]

    def select(self,
               table,
               columns=None,
               where=None,
               where_data=None,
               order_by=None,
               group_by=None,
               came_from=None):
        data = self.datastore.get_data(table,
                                       where=None,
                                       limit=None,
                                       offset=None)
        return data['data']
Esempio n. 2
0
class IntegrationDataNode(DataNode):
    type = 'integration'

    def __init__(self, config, integration_name):
        self.config = config
        self.integration_name = integration_name
        self.default_store = DataStore()

    def getType(self):
        return self.type

    def getTables(self):
        return []

    def hasTable(self, tableName):
        return True

    def getTableColumns(self, tableName):
        return []

    def select(self,
               table=None,
               columns=None,
               where=None,
               where_data=None,
               order_by=None,
               group_by=None,
               came_from=None):
        if isinstance(where, dict):
            where = [where]

        if isinstance(where, list):
            for el in where:
                if isinstance(el, dict):
                    for key in el:
                        if isinstance(
                                el[key],
                                list) and len(el[key]) > 0 and isinstance(
                                    el[key][0], str) and '.' in el[key][0]:
                            el[key][0] = el[key][0][el[key][0].find('.') + 1:]
            where = {'and': where}

        query = format({"from": table, 'select': columns, "where": where})

        ds, ds_name = self.default_store.save_datasource(
            f'temp_ds_{int(time.time()*100)}', self.integration_name,
            {'query': query})
        dso = self.default_store.get_datasource_obj(ds_name)
        data = dso.df.T.to_dict().values()
        self.default_store.delete_datasource(ds_name)

        return data
Esempio n. 3
0
def run_environment(db, config):
    DEFAULT_DB = f'default_{db}'

    temp_config_path = prepare_config(config, DEFAULT_DB)

    if is_container_run(f'{db}-test') is False:
        subprocess.Popen(['./cli.sh', db],
                         cwd=TESTS_ROOT.joinpath('docker/').resolve(),
                         stdout=OUTPUT,
                         stderr=OUTPUT)
        atexit.register(stop_container, name=db)
    db_ready = wait_db(config, DEFAULT_DB)

    if db_ready:
        sp = subprocess.Popen([
            'python3', '-m', 'mindsdb', '--api', 'mysql', '--config',
            temp_config_path
        ],
                              stdout=OUTPUT,
                              stderr=OUTPUT)
        atexit.register(stop_mindsdb, sp=sp)

    api_ready = db_ready and wait_api_ready(config)

    if db_ready is False or api_ready is False:
        print(
            f'Failed by timeout. {db} started={db_ready}, MindsDB started={api_ready}'
        )
        raise Exception()

    mdb = MindsdbNative(config)
    datastore = DataStore(config)

    return mdb, datastore
Esempio n. 4
0
def initialize_interfaces(app):
    app.default_store = DataStore()
    app.naitve_interface = NativeInterface()
    app.custom_models = CustomModels()
    app.dbw = DatabaseWrapper()
    config = Config()
    app.config_obj = config
Esempio n. 5
0
    def startProxy(config):
        global HARDCODED_USER
        global HARDCODED_PASSWORD
        global CERT_PATH
        global default_store
        global mdb
        global datahub
        """
        Create a server and wait for incoming connections until Ctrl-C
        """
        init_logger(config)

        HARDCODED_USER = config['api']['mysql']['user']
        HARDCODED_PASSWORD = config['api']['mysql']['password']
        CERT_PATH = config['api']['mysql']['certificate_path']
        default_store = DataStore(config)
        mdb = MindsdbNative(config)
        datahub = init_datahub(config)

        host = config['api']['mysql']['host']
        port = int(config['api']['mysql']['port'])

        log.info(f'Starting MindsDB Mysql proxy server on tcp://{host}:{port}')

        # Create the server
        if config['debug'] is True:
            SocketServer.TCPServer.allow_reuse_address = True
        server = SocketServer.ThreadingTCPServer((host, port), MysqlProxy)

        atexit.register(MysqlProxy.server_close, srv=server)

        # Activate the server; this will keep running until you
        # interrupt the program with Ctrl-C
        log.info('Waiting for incoming connections...')
        server.serve_forever()
Esempio n. 6
0
    def delete_model(self, name, company_id: int):
        original_name = name
        name = f'{company_id}@@@@@{name}'

        db_p = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=original_name).first()
        if db_p is None:
            raise Exception(f"Predictor '{name}' does not exist")
        db.session.delete(db_p)
        if db_p.datasource_id is not None:
            try:
                dataset_record = db.Datasource.query.get(db_p.datasource_id)
                if (isinstance(dataset_record.data, str) and json.loads(
                        dataset_record.data).get('source_type') != 'file'):
                    DataStore().delete_datasource(dataset_record.name,
                                                  company_id)
            except Exception:
                pass
        db.session.commit()

        DatabaseWrapper(company_id).unregister_predictor(name)

        # delete from s3
        self.fs_store.delete(f'predictor_{company_id}_{db_p.id}')

        return 0
Esempio n. 7
0
    def __init__(self,
                 name,
                 predictor,
                 stream_in,
                 stream_out,
                 anomaly_stream=None,
                 learning_stream=None,
                 learning_threshold=100):
        self.name = name
        self.predictor = predictor

        self.stream_in = stream_in
        self.stream_out = stream_out
        self.anomaly_stream = anomaly_stream

        self.learning_stream = learning_stream
        self.learning_threshold = learning_threshold
        self.learning_data = []

        self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
        self.stop_event = Event()
        self.model_interface = ModelInterfaceWrapper(ModelInterface())
        self.data_store = DataStore()
        self.config = Config()

        p = db.session.query(db.Predictor).filter_by(
            company_id=self.company_id, name=self.predictor).first()
        if p is None:
            raise Exception(f'Predictor {predictor} doesn\'t exist')

        self.target = p.to_predict[0]

        ts_settings = p.learn_args.get('timeseries_settings', None)
        if not ts_settings['is_timeseries']:
            ts_settings = None

        if ts_settings is None:
            self.thread = Thread(target=StreamController._make_predictions,
                                 args=(self, ))
        else:
            self.ts_settings = ts_settings
            self.thread = Thread(target=StreamController._make_ts_predictions,
                                 args=(self, ))

        self.thread.start()
Esempio n. 8
0
def run_update(name: str, company_id: int):
    original_name = name
    name = f'{company_id}@@@@@{name}'

    fs_store = FsStore()
    config = Config()
    data_store = DataStoreWrapper(DataStore(), company_id)

    try:
        predictor_record = Predictor.query.filter_by(company_id=company_id, name=original_name).first()
        assert predictor_record is not None

        predictor_record.update_status = 'updating'

        session.commit()
        ds = data_store.get_datasource_obj(None, raw=False, id=predictor_record.datasource_id)
        df = ds.df

        problem_definition = predictor_record.learn_args

        problem_definition['target'] = predictor_record.to_predict[0]

        if 'join_learn_process' in problem_definition:
            del problem_definition['join_learn_process']

        # Adapt kwargs to problem definition
        if 'timeseries_settings' in problem_definition:
            problem_definition['timeseries_settings'] = problem_definition['timeseries_settings']

        if 'stop_training_in_x_seconds' in problem_definition:
            problem_definition['time_aim'] = problem_definition['stop_training_in_x_seconds']

        json_ai = lightwood.json_ai_from_problem(df, problem_definition)
        predictor_record.json_ai = json_ai.to_dict()
        predictor_record.code = lightwood.code_from_json_ai(json_ai)
        predictor_record.data = {'training_log': 'training'}
        session.commit()
        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
        predictor.learn(df)

        fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}'
        pickle_path = os.path.join(config['paths']['predictors'], fs_name)
        predictor.save(pickle_path)
        fs_store.put(fs_name, fs_name, config['paths']['predictors'])
        predictor_record.data = predictor.model_analysis.to_dict()  # type: ignore
        session.commit()

        predictor_record.lightwood_version = lightwood.__version__
        predictor_record.mindsdb_version = mindsdb_version
        predictor_record.update_status = 'up_to_date'
        session.commit()

    except Exception as e:
        log.error(e)
        predictor_record.update_status = 'update_failed'  # type: ignore
        session.commit()
        return str(e)
Esempio n. 9
0
    def __init__(self, config):
        mongodb_config = config['api'].get('mongodb')
        assert mongodb_config is not None, 'is no mongodb config!'
        host = mongodb_config['host']
        port = mongodb_config['port']
        print(f'start mongo server on {host}:{port}')

        super().__init__((host, int(port)), MongoRequestHandler)

        self.mindsdb_env = {
            'config': config,
            'data_store': DataStore(config),
            'mindsdb_native': MindsdbNative(config)
        }

        respondersCollection = RespondersCollection()

        opQueryResponder = OpQueryResponder(respondersCollection)
        opMsgResponder = OpMsgResponder(respondersCollection)
        opInsertResponder = OpInsertResponder(respondersCollection)

        self.operationsHandlersMap = {
            OP_QUERY: opQueryResponder,
            OP_MSG: opMsgResponder,
            OP_INSERT: opInsertResponder
        }

        respondersCollection.add(
            when={'drop': 'system.sessions'},
            result={'ok': 1}
        )
        respondersCollection.add(
            when={'update': 'system.version'},
            result={'ok': 1}
        )
        respondersCollection.add(
            when={'setFeatureCompatibilityVersion': helpers.is_true},
            result={'ok': 1}
        )
        # OpMSG=OrderedDict([('features', 1), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599748325, 1)), ('signature', OrderedDict([('hash', b'\xb8\xc3\x03\x18\xca\xe6bh\xf0\xcb47,\x924\x8a >\xfc\x91'), ('keyId', 6870854312365391875)]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599748325, 1)), ('t', 1)]))])), ('$db', 'admin')])
        respondersCollection.add(
            when={'features': helpers.is_true},
            result={'ok': 1}
        )
        # OpMSG=OrderedDict([('serverStatus', 1), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599748366, 1)), ('signature', OrderedDict([('hash', b'\xa1E}\xbbIU\xc2D\x95++\x82\x88\xb5\x84\xf5\xda)+B'), ('keyId', 6870854312365391875)]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599748366, 1)), ('t', 1)]))])), ('$db', 'admin')])
        respondersCollection.add(
            when={'serverStatus': helpers.is_true},
            result={'ok': 1}
        )
        # OpMSG=OrderedDict([('ismaster', 1), ('$db', 'admin'), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599749031, 1)), ('signature', OrderedDict([('hash', b'6\x87\xd5Y\xa7\xc7\xcf$\xab\x1e\xa2{\xe5B\xe5\x99\xdbl\x8d\xf4'), ('keyId', 6870854312365391875)]))])), ('$client', OrderedDict([('application', OrderedDict([('name', 'MongoDB Shell')])), ('driver', OrderedDict([('name', 'MongoDB Internal Client'), ('version', '3.6.3')])), ('os', OrderedDict([('type', 'Linux'), ('name', 'Ubuntu'), ('architecture', 'x86_64'), ('version', '18.04')])), ('mongos', OrderedDict([('host', 'maxs-comp:27103'), ('client', '127.0.0.1:52148'), ('version', '3.6.3')]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599749031, 1)), ('t', 1)]))]))])

        respondersCollection.responders += op_msg_responders
Esempio n. 10
0
def run_environment(config,
                    apis=['mysql'],
                    override_integration_config={},
                    override_api_config={},
                    mindsdb_database='mindsdb',
                    clear_storage=True):
    temp_config_path = prepare_config(config, mindsdb_database,
                                      override_integration_config,
                                      override_api_config, clear_storage)
    config = Config(temp_config_path)

    api_str = ','.join(apis)
    sp = subprocess.Popen([
        'python3', '-m', 'mindsdb', '--api', api_str, '--config',
        temp_config_path, '--verbose'
    ],
                          close_fds=True,
                          stdout=OUTPUT,
                          stderr=OUTPUT)
    atexit.register(stop_mindsdb, sp=sp)

    async def wait_port_async(port, timeout):
        start_time = time.time()
        started = is_port_in_use(port)
        while (time.time() - start_time) < timeout and started is False:
            await asyncio.sleep(1)
            started = is_port_in_use(port)
        return started

    async def wait_apis_start(ports):
        futures = [wait_port_async(port, 60) for port in ports]
        success = True
        for i, future in enumerate(asyncio.as_completed(futures)):
            success = success and await future
        return success

    ports_to_wait = [config['api'][api]['port'] for api in apis]

    ioloop = asyncio.get_event_loop()
    if ioloop.is_closed():
        ioloop = asyncio.new_event_loop()
    success = ioloop.run_until_complete(wait_apis_start(ports_to_wait))
    ioloop.close()
    if not success:
        raise Exception('Cant start mindsdb apis')

    CONFIG.MINDSDB_STORAGE_PATH = config.paths['predictors']
    mdb = NativeInterface(config)
    datastore = DataStore(config)

    return mdb, datastore
Esempio n. 11
0
def run_environment(db, config, run_apis='mysql'):
    DEFAULT_DB = f'default_{db}'

    temp_config_path = prepare_config(config, DEFAULT_DB)

    if db in ['mssql', 'mongodb']:
        db_ready = True
    else:
        if is_container_run(f'{db}-test') is False:
            run_container(db)
        db_ready = wait_db(config, DEFAULT_DB)

    if isinstance(run_apis, list) is False:
        run_apis = run_apis.split(',')
    api_str = ','.join(run_apis)

    if db_ready:
        sp = subprocess.Popen([
            'python3', '-m', 'mindsdb', '--api', api_str, '--config',
            temp_config_path
        ],
                              stdout=OUTPUT,
                              stderr=OUTPUT)
        atexit.register(stop_mindsdb, sp=sp)

    api_ready = True
    for api in run_apis:
        apistr = 'mongodb' if api == 'mongodb' else api
        api_ready = api_ready and wait_api_ready(config, apistr)
        if api_ready is False:
            break

    if db_ready is False or api_ready is False:
        print(
            f'Failed by timeout. {db} started={db_ready}, MindsDB started={api_ready}'
        )
        raise Exception()

    CONFIG.MINDSDB_STORAGE_PATH = config.paths['predictors']
    mdb = MindsdbNative(config)
    datastore = DataStore(config)

    return mdb, datastore
Esempio n. 12
0
    def update_model(self, name):
        from mindsdb_native import F
        from mindsdb_worker.updater.update_model import update_model
        from mindsdb.interfaces.storage.db import session, Predictor
        from mindsdb.interfaces.datastore.datastore import DataStore

        try:
            predictor_record = Predictor.query.filter_by(
                company_id=self.company_id, name=name,
                is_custom=False).first()
            predictor_record.update_status = 'updating'
            session.commit()
            update_model(name, self.delete_model, F.delete_model, self.learn,
                         self._lock_context, self.company_id,
                         self.config['paths']['predictors'], predictor_record,
                         self.fs_store, DataStore())

            predictor_record = self._update_db_status(predictor_record)
        except Exception as e:
            log.error(e)
            predictor_record.update_status = 'update_failed'
            session.commit()
            return str(e)
Esempio n. 13
0
 def __init__(self, config, integration_name):
     self.config = config
     self.integration_name = integration_name
     self.default_store = DataStore()
Esempio n. 14
0
 def __init__(self, config):
     self.config = config
     self.datastore = DataStore(config)
Esempio n. 15
0
def initialize_interfaces(app):
    app.original_data_store = DataStore()
    app.original_model_interface = ModelInterface()
    config = Config()
    app.config_obj = config
class IntegrationDataNode(DataNode):
    type = 'integration'

    def __init__(self, config, integration_name):
        self.config = config
        self.integration_name = integration_name
        self.default_store = DataStore()

    def getType(self):
        return self.type

    def getTables(self):
        return []

    def hasTable(self, tableName):
        return True

    def getTableColumns(self, tableName):
        return []

    def select(self,
               table=None,
               columns=None,
               where=None,
               where_data=None,
               order_by=None,
               group_by=None,
               came_from=None):
        has_where = isinstance(where, (dict, list)) and len(where) > 0

        if isinstance(where, dict):
            where = [where]

        if isinstance(where, list):
            for el in where:
                if isinstance(el, dict):
                    for key in el:
                        if isinstance(
                                el[key],
                                list) and len(el[key]) > 0 and isinstance(
                                    el[key][0], str) and '.' in el[key][0]:
                            el[key][0] = el[key][0][el[key][0].find('.') + 1:]
            where = {'and': where}

        format_data = {'from': table, 'select': columns}
        if has_where:
            format_data['where'] = where

        query = format(format_data)

        ds, ds_name = self.default_store.save_datasource(
            f'temp_ds_{int(time.time()*100)}', self.integration_name,
            {'query': query})
        dso = self.default_store.get_datasource_obj(ds_name)

        data = dso.df.to_dict(orient='records')

        for column_name in dso.df.columns:
            if pd.core.dtypes.common.is_datetime_or_timedelta_dtype(
                    dso.df[column_name]):
                pass_data = dso.df[column_name].dt.to_pydatetime()
                for i, rec in enumerate(data):
                    rec[column_name] = pass_data[i].timestamp()

        self.default_store.delete_datasource(ds_name)

        return data
Esempio n. 17
0
class MindsDBDataNode(DataNode):
    type = 'mindsdb'

    def __init__(self, config):
        self.config = Config()
        self.mindsdb_native = NativeInterface()
        self.custom_models = CustomModels()
        self.ai_table = AITable_store()
        self.default_store = DataStore()

    def getTables(self):
        models = self.mindsdb_native.get_models()
        models = [x['name'] for x in models if x['status'] == 'complete']
        models += ['predictors', 'commands']
        models += [x['name'] for x in self.custom_models.get_models()]

        ai_tables = self.ai_table.get_ai_tables()
        models += [x['name'] for x in ai_tables]
        return models

    def hasTable(self, table):
        return table in self.getTables()

    def _get_ai_table_columns(self, table_name):
        aitable_record = self.ai_table.get_ai_table(table_name)
        columns = ([x['name'] for x in aitable_record.query_fields] +
                   [x['name'] for x in aitable_record.predictor_columns])
        return columns

    def _get_model_columns(self, table_name):
        model = self.mindsdb_native.get_model_data(name=table_name)
        columns = []
        columns += model['columns']
        columns += [f'{x}_original' for x in model['predict']]
        for col in model['predict']:
            if model['data_analysis_v2'][col]['typing'][
                    'data_type'] == 'Numeric':
                columns += [f"{col}_min", f"{col}_max"]
            columns += [f"{col}_confidence"]
            columns += [f"{col}_explain"]
        return columns

    def getTableColumns(self, table):
        try:
            columns = self.custom_models.get_model_data(table)['columns']
            columns += [
                'external_datasource', 'select_data_query', 'when_data'
            ]
            return columns
        except Exception:
            pass

        if table == 'predictors':
            return [
                'name', 'status', 'accuracy', 'predict', 'select_data_query',
                'external_datasource', 'training_options'
            ]
        if table == 'commands':
            return ['command']

        columns = []

        ai_tables = self.ai_table.get_ai_table(table)
        if ai_tables is not None:
            columns = self._get_ai_table_columns(table)
        elif table in [x['name'] for x in self.mindsdb_native.get_models()]:
            columns = self._get_model_columns(table)
            columns += [
                'when_data', 'select_data_query', 'external_datasource'
            ]

        return columns

    def _select_predictors(self):
        models = self.mindsdb_native.get_models()
        # TODO add custom models
        return [
            {
                'name': x['name'],
                'status': x['status'],
                'accuracy':
                str(x['accuracy']) if x['accuracy'] is not None else None,
                'predict': ', '.join(x['predict']),
                'select_data_query': '',
                'external_datasource': '',  # TODO
                'training_options': ''  # TODO ?
            } for x in models
        ]

    def delete_predictor(self, name):
        self.mindsdb_native.delete_model(name)

    def _select_from_ai_table(self, table, columns, where):
        aitable_record = self.ai_table.get_ai_table(table)
        integration = aitable_record.integration_name
        query = aitable_record.integration_query
        predictor_name = aitable_record.predictor_name

        ds, ds_name = self.default_store.save_datasource(
            'temp_ds', integration, {'query': query})
        dso = self.default_store.get_datasource_obj(ds_name)
        res = self.mindsdb_native.predict(name=predictor_name, when_data=dso)
        self.default_store.delete_datasource(ds_name)

        keys_map = {}
        for f in aitable_record.predictor_columns:
            keys_map[f['value']] = f['name']
        for f in aitable_record.query_fields:
            keys_map[f['name']] = f['name']
        keys = list(keys_map.keys())

        data = []
        for i, el in enumerate(res):
            data.append({keys_map[key]: el[key] for key in keys})

        return data

    def select(self,
               table,
               columns=None,
               where=None,
               where_data=None,
               order_by=None,
               group_by=None,
               came_from=None):
        ''' NOTE WHERE statements can be just $eq joined with 'and'
        '''
        if table == 'predictors':
            return self._select_predictors()
        if table == 'commands':
            return []
        if self.ai_table.get_ai_table(table):
            return self._select_from_ai_table(table, columns, where)

        original_when_data = None
        if 'when_data' in where:
            if len(where) > 1:
                raise ValueError(
                    "Should not be used any other keys in 'where', if 'when_data' used"
                )
            try:
                original_when_data = where['when_data']['$eq']
                where_data = json.loads(where['when_data']['$eq'])
                if isinstance(where_data, list) is False:
                    where_data = [where_data]
            except Exception:
                raise ValueError(
                    f'''Error while parse 'when_data'="{where_data}"''')

        external_datasource = None
        if 'external_datasource' in where:
            external_datasource = where['external_datasource']['$eq']
            del where['external_datasource']

        select_data_query = None
        if came_from is not None and 'select_data_query' in where:
            select_data_query = where['select_data_query']['$eq']
            del where['select_data_query']

            dbtype = self.config['integrations'][came_from]['type']
            if dbtype == 'clickhouse':
                ch = Clickhouse(self.config, came_from)
                res = ch._query(
                    select_data_query.strip(' ;\n') + ' FORMAT JSON')
                data = res.json()['data']
            elif dbtype == 'mariadb':
                maria = Mariadb(self.config, came_from)
                data = maria._query(select_data_query)
            elif dbtype == 'mysql':
                mysql = MySQL(self.config, came_from)
                data = mysql._query(select_data_query)
            elif dbtype == 'postgres':
                mysql = PostgreSQL(self.config, came_from)
                data = mysql._query(select_data_query)
            elif dbtype == 'mssql':
                mssql = MSSQL(self.config, came_from)
                data = mssql._query(select_data_query, fetch=True)
            else:
                raise Exception(f'Unknown database type: {dbtype}')

            if where_data is None:
                where_data = data
            else:
                where_data += data

        new_where = {}
        if where_data is not None:
            where_data = pandas.DataFrame(where_data)
        else:
            for key, value in where.items():
                if isinstance(value, dict) is False or len(
                        value.keys()) != 1 or list(value.keys())[0] != '$eq':
                    # TODO value should be just string or number
                    raise Exception()
                new_where[key] = value['$eq']

            if len(new_where) == 0:
                return []

            where_data = [new_where]

        try:
            model = self.custom_models.get_model_data(name=table)
        except Exception:
            model = self.mindsdb_native.get_model_data(name=table)

        predicted_columns = model['predict']

        original_target_values = {}
        for col in predicted_columns:
            if where_data is not None:
                if col in where_data:
                    original_target_values[col + '_original'] = list(
                        where_data[col])
                else:
                    original_target_values[col +
                                           '_original'] = [None
                                                           ] * len(where_data)
            else:
                original_target_values[col + '_original'] = [None]

        if table in [x['name'] for x in self.custom_models.get_models()]:
            res = self.custom_models.predict(name=table, when_data=where_data)

            data = []
            fields = model['columns']
            for i, ele in enumerate(res):
                row = {}
                row['select_data_query'] = select_data_query
                row['external_datasource'] = external_datasource
                row['when_data'] = original_when_data

                for key in ele:
                    row[key] = ele[key]['predicted_value']
                    # FIXME prefer get int from mindsdb_native in this case
                    if model['data_analysis_v2'][key]['typing'][
                            'data_subtype'] == 'Int':
                        row[key] = int(row[key])

                for k in fields:
                    if k not in ele:
                        if isinstance(where_data, list):
                            if k in where_data[i]:
                                row[k] = where_data[i][k]
                            else:
                                row[k] = None
                        elif k in where_data.columns:
                            row[k] = where_data[k].iloc[i]
                        else:
                            row[k] = None

                for k in original_target_values:
                    row[k] = original_target_values[k][i]

                data.append(row)

            field_types = {
                f: model['data_analysis_v2'][f]['typing']['data_subtype']
                for f in fields if 'typing' in model['data_analysis_v2'][f]
            }
            for row in data:
                cast_row_types(row, field_types)

            return data
        else:
            res = self.mindsdb_native.predict(name=table, when_data=where_data)

            keys = [x for x in list(res._data.keys()) if x in columns]
            min_max_keys = []
            for col in predicted_columns:
                if model['data_analysis_v2'][col]['typing'][
                        'data_type'] == 'Numeric':
                    min_max_keys.append(col)

            data = []
            explains = []
            for i, el in enumerate(res):
                data.append({key: el[key] for key in keys})
                explains.append(el.explain())

            field_types = {
                f: model['data_analysis_v2'][f]['typing']['data_subtype']
                for f in model['columns']
                if 'typing' in model['data_analysis_v2'][f]
            }

            for i, row in enumerate(data):
                cast_row_types(row, field_types)

                row['select_data_query'] = select_data_query
                row['external_datasource'] = external_datasource
                row['when_data'] = original_when_data

                for k in original_target_values:
                    row[k] = original_target_values[k][i]

                explanation = explains[i]
                for key in predicted_columns:
                    row[key + '_confidence'] = explanation[key]['confidence']
                    row[key + '_explain'] = json.dumps(explanation[key],
                                                       cls=NumpyJSONEncoder,
                                                       ensure_ascii=False)
                for key in min_max_keys:
                    row[key + '_min'] = min(
                        explanation[key]['confidence_interval'])
                    row[key + '_max'] = max(
                        explanation[key]['confidence_interval'])

            return data
Esempio n. 18
0
 def __init__(self, config):
     self.config = Config()
     self.mindsdb_native = NativeInterface()
     self.custom_models = CustomModels()
     self.ai_table = AITable_store()
     self.default_store = DataStore()
Esempio n. 19
0
def initialize_interfaces(config, app):
    app.default_store = DataStore(config)
    app.mindsdb_native = NativeInterface(config)
    app.custom_models = CustomModels(config)
    app.dbw = DatabaseWrapper(config)
    app.config_obj = config
Esempio n. 20
0
def initialize_interfaces(config, app):
    app.default_store = DataStore(config)
    app.mindsdb_native = MindsdbNative(config)
    app.config_obj = config
Esempio n. 21
0
class StreamController:
    def __init__(self,
                 name,
                 predictor,
                 stream_in,
                 stream_out,
                 anomaly_stream=None,
                 learning_stream=None,
                 learning_threshold=100):
        self.name = name
        self.predictor = predictor

        self.stream_in = stream_in
        self.stream_out = stream_out
        self.anomaly_stream = anomaly_stream

        self.learning_stream = learning_stream
        self.learning_threshold = learning_threshold
        self.learning_data = []

        self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
        self.stop_event = Event()
        self.model_interface = ModelInterfaceWrapper(ModelInterface())
        self.data_store = DataStore()
        self.config = Config()

        p = db.session.query(db.Predictor).filter_by(
            company_id=self.company_id, name=self.predictor).first()
        if p is None:
            raise Exception(f'Predictor {predictor} doesn\'t exist')

        self.target = p.to_predict[0]

        ts_settings = p.learn_args.get('timeseries_settings', None)
        if not ts_settings['is_timeseries']:
            ts_settings = None

        if ts_settings is None:
            self.thread = Thread(target=StreamController._make_predictions,
                                 args=(self, ))
        else:
            self.ts_settings = ts_settings
            self.thread = Thread(target=StreamController._make_ts_predictions,
                                 args=(self, ))

        self.thread.start()

    def _is_anomaly(self, res):
        for k in res:
            if k.endswith('_anomaly') and res[k] is not None:
                return True
        return False

    def _consider_learning(self):
        if self.learning_stream is not None:
            self.learning_data.extend(self.learning_stream.read())
            if len(self.learning_data) >= self.learning_threshold:
                p = db.session.query(db.Predictor).filter_by(
                    company_id=self.company_id, name=self.predictor).first()
                ds_record = db.session.query(
                    db.Datasource).filter_by(id=p.datasource_id).first()

                df = pd.DataFrame.from_records(self.learning_data)
                name = 'name_' + str(time()).replace('.', '_')
                path = os.path.join(self.config['paths']['datasources'], name)
                df.to_csv(path)

                from_data = {
                    'class': 'FileDS',
                    'args': [path],
                    'kwargs': {},
                }

                self.data_store.save_datasource(name=name,
                                                source_type='file',
                                                source=path,
                                                file_path=path,
                                                company_id=self.company_id)
                ds = self.data_store.get_datasource(name, self.company_id)

                self.model_interface.adjust(p.name, from_data, ds['id'],
                                            self.company_id)
                self.learning_data.clear()

    def _make_predictions(self):
        while not self.stop_event.wait(0.5):
            self._consider_learning()
            for when_data in self.stream_in.read():
                preds = self.model_interface.predict(self.predictor, when_data,
                                                     'dict')
                for res in preds:
                    if self.anomaly_stream is not None and self._is_anomaly(
                            res):
                        self.anomaly_stream.write(res)
                    else:
                        self.stream_out.write(res)

    def _make_ts_predictions(self):
        window = self.ts_settings['window']

        order_by = self.ts_settings['order_by']
        order_by = [order_by] if isinstance(order_by, str) else order_by

        group_by = self.ts_settings.get('group_by', None)
        group_by = [group_by] if isinstance(group_by, str) else group_by

        cache = Cache(self.name)

        while not self.stop_event.wait(0.5):
            self._consider_learning()
            for when_data in self.stream_in.read():
                for ob in order_by:
                    if ob not in when_data:
                        raise Exception(
                            f'when_data doesn\'t contain order_by[{ob}]')

                for gb in group_by:
                    if gb not in when_data:
                        raise Exception(
                            f'when_data doesn\'t contain group_by[{gb}]')

                gb_value = tuple(
                    when_data[gb]
                    for gb in group_by) if group_by is not None else ''

                # because cache doesn't work for tuples
                # (raises Exception: tuple doesn't have "encode" attribute)
                gb_value = str(gb_value)

                with cache:
                    if gb_value not in cache:
                        cache[gb_value] = []

                    # do this because shelve-cache doesn't support
                    # in-place changing
                    records = cache[gb_value]
                    records.append(when_data)
                    cache[gb_value] = records

            with cache:
                for gb_value in cache.keys():
                    if len(cache[gb_value]) >= window:
                        cache[gb_value] = [
                            *sorted(
                                cache[gb_value],
                                # WARNING: assuming wd[ob] is numeric
                                key=lambda wd: tuple(wd[ob]
                                                     for ob in order_by))
                        ]

                        while len(cache[gb_value]) >= window:
                            res_list = self.model_interface.predict(
                                self.predictor, cache[gb_value][:window],
                                'dict')
                            if self.anomaly_stream is not None and self._is_anomaly(
                                    res_list[-1]):
                                self.anomaly_stream.write(res_list[-1])
                            else:
                                self.stream_out.write(res_list[-1])
                            cache[gb_value] = cache[gb_value][1:]