Exemple #1
0
def run_fit(predictor_id: int, df: pd.DataFrame) -> None:
    try:
        predictor_record = session.query(db.Predictor).filter_by(id=predictor_id).first()
        assert predictor_record is not None

        fs_store = FsStore()
        config = Config()

        predictor_record.data = {'training_log': 'training'}
        session.commit()
        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
        predictor.learn(df)

        session.refresh(predictor_record)

        fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}'
        pickle_path = os.path.join(config['paths']['predictors'], fs_name)
        predictor.save(pickle_path)

        fs_store.put(fs_name, fs_name, config['paths']['predictors'])

        predictor_record.data = predictor.model_analysis.to_dict()
        predictor_record.dtype_dict = predictor.dtype_dict
        session.commit()

        dbw = DatabaseWrapper(predictor_record.company_id)
        mi = ModelInterfaceWrapper(ModelInterface(), predictor_record.company_id)
        dbw.register_predictors([mi.get_model_data(predictor_record.name)])
    except Exception as e:
        session.refresh(predictor_record)
        predictor_record.data = {'error': f'{traceback.format_exc()}\nMain error: {e}'}
        session.commit()
        raise e
    def __init__(self,
                 original_model_interface,
                 original_data_store,
                 company_id=None) -> object:
        """
        Initialize the session
        :param company_id:
        """

        self.username = None
        self.auth = False
        self.company_id = company_id
        self.logging = log

        self.integration = None
        self.integration_type = None
        self.database = None

        self.config = Config()
        self.ai_table = AITableStore(company_id=company_id)
        self.data_store = DataStoreWrapper(data_store=original_data_store,
                                           company_id=company_id)
        self.model_interface = ModelInterfaceWrapper(
            model_interface=original_model_interface, company_id=company_id)

        self.datahub = init_datahub(model_interface=self.model_interface,
                                    ai_table=self.ai_table,
                                    data_store=self.data_store,
                                    company_id=company_id)

        self.prepared_stmts = {}
        self.packet_sequence_number = 0
    def __init__(self,
                 name,
                 predictor,
                 stream_in,
                 stream_out,
                 anomaly_stream=None,
                 learning_stream=None,
                 learning_threshold=100):
        self.name = name
        self.predictor = predictor

        self.stream_in = stream_in
        self.stream_out = stream_out
        self.anomaly_stream = anomaly_stream

        self.learning_stream = learning_stream
        self.learning_threshold = learning_threshold
        self.learning_data = []

        self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
        self.stop_event = Event()
        self.model_interface = ModelInterfaceWrapper(ModelInterface())
        self.data_store = DataStore()
        self.config = Config()

        p = db.session.query(db.Predictor).filter_by(
            company_id=self.company_id, name=self.predictor).first()
        if p is None:
            raise Exception(f'Predictor {predictor} doesn\'t exist')

        self.target = p.to_predict[0]

        ts_settings = p.learn_args.get('timeseries_settings', None)
        if not ts_settings['is_timeseries']:
            ts_settings = None

        if ts_settings is None:
            self.thread = Thread(target=StreamController._make_predictions,
                                 args=(self, ))
        else:
            self.ts_settings = ts_settings
            self.thread = Thread(target=StreamController._make_ts_predictions,
                                 args=(self, ))

        self.thread.start()
Exemple #4
0
    def __init__(self, config):
        mongodb_config = config['api'].get('mongodb')
        assert mongodb_config is not None, 'is no mongodb config!'
        host = mongodb_config['host']
        port = mongodb_config['port']
        log.debug(f'start mongo server on {host}:{port}')

        super().__init__((host, int(port)), MongoRequestHandler)

        self.mindsdb_env = {
            'config': config,
            'origin_data_store': DataStore(),
            'origin_model_interface': ModelInterface()
        }
        self.mindsdb_env['mindsdb_native'] = ModelInterfaceWrapper(
            model_interface=self.mindsdb_env['origin_model_interface'],
            company_id=None)
        self.mindsdb_env['data_store'] = DataStoreWrapper(
            data_store=self.mindsdb_env['origin_data_store'], company_id=None)

        respondersCollection = RespondersCollection()

        opQueryResponder = OpQueryResponder(respondersCollection)
        opMsgResponder = OpMsgResponder(respondersCollection)
        opInsertResponder = OpInsertResponder(respondersCollection)

        self.operationsHandlersMap = {
            OP_QUERY: opQueryResponder,
            OP_MSG: opMsgResponder,
            OP_INSERT: opInsertResponder
        }

        respondersCollection.add(when={'drop': 'system.sessions'},
                                 result={'ok': 1})
        respondersCollection.add(when={'update': 'system.version'},
                                 result={'ok': 1})
        respondersCollection.add(
            when={'setFeatureCompatibilityVersion': helpers.is_true},
            result={'ok': 1})
        # OpMSG=OrderedDict([('features', 1), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599748325, 1)), ('signature', OrderedDict([('hash', b'\xb8\xc3\x03\x18\xca\xe6bh\xf0\xcb47,\x924\x8a >\xfc\x91'), ('keyId', 6870854312365391875)]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599748325, 1)), ('t', 1)]))])), ('$db', 'admin')])
        respondersCollection.add(when={'features': helpers.is_true},
                                 result={'ok': 1})
        # OpMSG=OrderedDict([('serverStatus', 1), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599748366, 1)), ('signature', OrderedDict([('hash', b'\xa1E}\xbbIU\xc2D\x95++\x82\x88\xb5\x84\xf5\xda)+B'), ('keyId', 6870854312365391875)]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599748366, 1)), ('t', 1)]))])), ('$db', 'admin')])
        respondersCollection.add(when={'serverStatus': helpers.is_true},
                                 result={'ok': 1})
        # OpMSG=OrderedDict([('ismaster', 1), ('$db', 'admin'), ('$clusterTime', OrderedDict([('clusterTime', Timestamp(1599749031, 1)), ('signature', OrderedDict([('hash', b'6\x87\xd5Y\xa7\xc7\xcf$\xab\x1e\xa2{\xe5B\xe5\x99\xdbl\x8d\xf4'), ('keyId', 6870854312365391875)]))])), ('$client', OrderedDict([('application', OrderedDict([('name', 'MongoDB Shell')])), ('driver', OrderedDict([('name', 'MongoDB Internal Client'), ('version', '3.6.3')])), ('os', OrderedDict([('type', 'Linux'), ('name', 'Ubuntu'), ('architecture', 'x86_64'), ('version', '18.04')])), ('mongos', OrderedDict([('host', 'maxs-comp:27103'), ('client', '127.0.0.1:52148'), ('version', '3.6.3')]))])), ('$configServerState', OrderedDict([('opTime', OrderedDict([('ts', Timestamp(1599749031, 1)), ('t', 1)]))]))])

        respondersCollection.responders += responders
Exemple #5
0
    def before_request():
        company_id = request.headers.get('company-id')

        if company_id is not None:
            try:
                company_id = int(company_id)
            except Exception as e:
                get_log('http').error(
                    f'Cloud not parse company id: {company_id} | exception: {e}'
                )
                company_id = None

        request.company_id = company_id

        request.default_store = DataStoreWrapper(
            data_store=current_app.original_data_store, company_id=company_id)

        request.model_interface = ModelInterfaceWrapper(
            model_interface=current_app.original_model_interface,
            company_id=company_id)
Exemple #6
0
    os.environ['DEFAULT_LOG_LEVEL'] = config['log']['level']['console']
    os.environ['LIGHTWOOD_LOG_LEVEL'] = config['log']['level']['console']

    # Switch to this once the native interface has it's own thread :/
    ctx = mp.get_context('spawn')

    from mindsdb.__about__ import __version__ as mindsdb_version
    print(f'Version {mindsdb_version}')

    print(f'Configuration file:\n   {config.config_path}')
    print(f"Storage path:\n   {config['paths']['root']}")

    # @TODO Backwards compatibiltiy for tests, remove later
    from mindsdb.interfaces.database.integrations import add_db_integration, get_db_integration, remove_db_integration
    dbw = DatabaseWrapper(COMPANY_ID)
    model_interface = ModelInterfaceWrapper(ModelInterface(), COMPANY_ID)
    raw_model_data_arr = model_interface.get_models()
    model_data_arr = []
    for model in raw_model_data_arr:
        if model['status'] == 'complete':
            x = model_interface.get_model_data(model['name'])
            try:
                model_data_arr.append(
                    model_interface.get_model_data(model['name']))
            except Exception:
                pass

    is_cloud = config.get('cloud', False)
    if not is_cloud:
        for integration_name in get_db_integrations(COMPANY_ID,
                                                    sensitive_info=True):
class StreamController:
    def __init__(self,
                 name,
                 predictor,
                 stream_in,
                 stream_out,
                 anomaly_stream=None,
                 learning_stream=None,
                 learning_threshold=100):
        self.name = name
        self.predictor = predictor

        self.stream_in = stream_in
        self.stream_out = stream_out
        self.anomaly_stream = anomaly_stream

        self.learning_stream = learning_stream
        self.learning_threshold = learning_threshold
        self.learning_data = []

        self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
        self.stop_event = Event()
        self.model_interface = ModelInterfaceWrapper(ModelInterface())
        self.data_store = DataStore()
        self.config = Config()

        p = db.session.query(db.Predictor).filter_by(
            company_id=self.company_id, name=self.predictor).first()
        if p is None:
            raise Exception(f'Predictor {predictor} doesn\'t exist')

        self.target = p.to_predict[0]

        ts_settings = p.learn_args.get('timeseries_settings', None)
        if not ts_settings['is_timeseries']:
            ts_settings = None

        if ts_settings is None:
            self.thread = Thread(target=StreamController._make_predictions,
                                 args=(self, ))
        else:
            self.ts_settings = ts_settings
            self.thread = Thread(target=StreamController._make_ts_predictions,
                                 args=(self, ))

        self.thread.start()

    def _is_anomaly(self, res):
        for k in res:
            if k.endswith('_anomaly') and res[k] is not None:
                return True
        return False

    def _consider_learning(self):
        if self.learning_stream is not None:
            self.learning_data.extend(self.learning_stream.read())
            if len(self.learning_data) >= self.learning_threshold:
                p = db.session.query(db.Predictor).filter_by(
                    company_id=self.company_id, name=self.predictor).first()
                ds_record = db.session.query(
                    db.Datasource).filter_by(id=p.datasource_id).first()

                df = pd.DataFrame.from_records(self.learning_data)
                name = 'name_' + str(time()).replace('.', '_')
                path = os.path.join(self.config['paths']['datasources'], name)
                df.to_csv(path)

                from_data = {
                    'class': 'FileDS',
                    'args': [path],
                    'kwargs': {},
                }

                self.data_store.save_datasource(name=name,
                                                source_type='file',
                                                source=path,
                                                file_path=path,
                                                company_id=self.company_id)
                ds = self.data_store.get_datasource(name, self.company_id)

                self.model_interface.adjust(p.name, from_data, ds['id'],
                                            self.company_id)
                self.learning_data.clear()

    def _make_predictions(self):
        while not self.stop_event.wait(0.5):
            self._consider_learning()
            for when_data in self.stream_in.read():
                preds = self.model_interface.predict(self.predictor, when_data,
                                                     'dict')
                for res in preds:
                    if self.anomaly_stream is not None and self._is_anomaly(
                            res):
                        self.anomaly_stream.write(res)
                    else:
                        self.stream_out.write(res)

    def _make_ts_predictions(self):
        window = self.ts_settings['window']

        order_by = self.ts_settings['order_by']
        order_by = [order_by] if isinstance(order_by, str) else order_by

        group_by = self.ts_settings.get('group_by', None)
        group_by = [group_by] if isinstance(group_by, str) else group_by

        cache = Cache(self.name)

        while not self.stop_event.wait(0.5):
            self._consider_learning()
            for when_data in self.stream_in.read():
                for ob in order_by:
                    if ob not in when_data:
                        raise Exception(
                            f'when_data doesn\'t contain order_by[{ob}]')

                for gb in group_by:
                    if gb not in when_data:
                        raise Exception(
                            f'when_data doesn\'t contain group_by[{gb}]')

                gb_value = tuple(
                    when_data[gb]
                    for gb in group_by) if group_by is not None else ''

                # because cache doesn't work for tuples
                # (raises Exception: tuple doesn't have "encode" attribute)
                gb_value = str(gb_value)

                with cache:
                    if gb_value not in cache:
                        cache[gb_value] = []

                    # do this because shelve-cache doesn't support
                    # in-place changing
                    records = cache[gb_value]
                    records.append(when_data)
                    cache[gb_value] = records

            with cache:
                for gb_value in cache.keys():
                    if len(cache[gb_value]) >= window:
                        cache[gb_value] = [
                            *sorted(
                                cache[gb_value],
                                # WARNING: assuming wd[ob] is numeric
                                key=lambda wd: tuple(wd[ob]
                                                     for ob in order_by))
                        ]

                        while len(cache[gb_value]) >= window:
                            res_list = self.model_interface.predict(
                                self.predictor, cache[gb_value][:window],
                                'dict')
                            if self.anomaly_stream is not None and self._is_anomaly(
                                    res_list[-1]):
                                self.anomaly_stream.write(res_list[-1])
                            else:
                                self.stream_out.write(res_list[-1])
                            cache[gb_value] = cache[gb_value][1:]