コード例 #1
0
ファイル: model_controller.py プロジェクト: szhorizon/mindsdb
 def __init__(self, ray_based):
     self.config = Config()
     self.fs_store = FsSotre()
     self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
     self.dbw = DatabaseWrapper()
     self.predictor_cache = {}
     self.ray_based = ray_based
コード例 #2
0
    def put(self, name):
        params = request.json.get('params')
        if not isinstance(params, dict):
            abort(400, "type of 'params' must be dict")

        is_test = params.get('test', False)
        if is_test:
            del params['test']

        integration = get_integration(name)
        if integration is not None:
            abort(400, f"Integration with name '{name}' already exists")
        try:
            if 'enabled' in params:
                params['publish'] = params['enabled']
                del params['enabled']
            ca.config_obj.add_db_integration(name, params)

            mdb = ca.mindsdb_native
            cst = ca.custom_models
            model_data_arr = get_all_models_meta_data(mdb, cst)
            dbw = DatabaseWrapper(ca.config_obj)
            dbw.register_predictors(model_data_arr)
        except Exception as e:
            print(traceback.format_exc())
            abort(500, f'Error during config update: {str(e)}')

        if is_test:
            cons = dbw.check_connections()
            ca.config_obj.remove_db_integration(name)
            return {'success': cons[name]}, 200

        return '', 200
コード例 #3
0
def run_fit(predictor_id: int, df: pd.DataFrame) -> None:
    try:
        predictor_record = session.query(db.Predictor).filter_by(id=predictor_id).first()
        assert predictor_record is not None

        fs_store = FsStore()
        config = Config()

        predictor_record.data = {'training_log': 'training'}
        session.commit()
        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
        predictor.learn(df)

        session.refresh(predictor_record)

        fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}'
        pickle_path = os.path.join(config['paths']['predictors'], fs_name)
        predictor.save(pickle_path)

        fs_store.put(fs_name, fs_name, config['paths']['predictors'])

        predictor_record.data = predictor.model_analysis.to_dict()
        predictor_record.dtype_dict = predictor.dtype_dict
        session.commit()

        dbw = DatabaseWrapper(predictor_record.company_id)
        mi = ModelInterfaceWrapper(ModelInterface(), predictor_record.company_id)
        dbw.register_predictors([mi.get_model_data(predictor_record.name)])
    except Exception as e:
        session.refresh(predictor_record)
        predictor_record.data = {'error': f'{traceback.format_exc()}\nMain error: {e}'}
        session.commit()
        raise e
コード例 #4
0
 def get(self, name):
     '''return datasource metadata'''
     dbw = DatabaseWrapper(ca.config_obj)
     for db_name, connected in dbw.check_connections().items():
         if db_name == name:
             return connected, 200
     return f'Can\'t find database integration: {name}', 400
コード例 #5
0
 def __init__(self, config):
     self.config = config
     self.dbw = DatabaseWrapper(self.config)
     self.storage_dir = os.path.join(config['storage_dir'], 'misc')
     os.makedirs(self.storage_dir, exist_ok=True)
     self.model_cache = {}
     self.mindsdb_native = MindsdbNative(self.config)
     self.dbw = DatabaseWrapper(self.config)
コード例 #6
0
 def __init__(self):
     self.config = Config()
     self.fs_store = FsSotre()
     self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
     self.dbw = DatabaseWrapper()
     self.storage_dir = self.config['paths']['custom_models']
     os.makedirs(self.storage_dir, exist_ok=True)
     self.model_cache = {}
     self.mindsdb_native = NativeInterface()
     self.dbw = DatabaseWrapper()
コード例 #7
0
def run_fit(predictor_id: int, df: pd.DataFrame) -> None:
    try:
        predictor_record = Predictor.query.with_for_update().get(predictor_id)
        assert predictor_record is not None

        fs_store = FsStore()
        config = Config()

        predictor_record.data = {'training_log': 'training'}
        session.commit()
        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(
            predictor_record.code)
        predictor.learn(df)

        session.refresh(predictor_record)

        fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}'
        pickle_path = os.path.join(config['paths']['predictors'], fs_name)
        predictor.save(pickle_path)

        fs_store.put(fs_name, fs_name, config['paths']['predictors'])

        predictor_record.data = predictor.model_analysis.to_dict()

        # getting training time for each tried model. it is possible to do
        # after training only
        fit_mixers = list(predictor.runtime_log[x]
                          for x in predictor.runtime_log
                          if isinstance(x, tuple) and x[0] == "fit_mixer")
        submodel_data = predictor_record.data.get("submodel_data", [])
        # add training time to other mixers info
        if submodel_data and fit_mixers and len(submodel_data) == len(
                fit_mixers):
            for i, tr_time in enumerate(fit_mixers):
                submodel_data[i]["training_time"] = tr_time
        predictor_record.data["submodel_data"] = submodel_data

        predictor_record.dtype_dict = predictor.dtype_dict
        session.commit()

        dbw = DatabaseWrapper(predictor_record.company_id)
        mi = WithKWArgsWrapper(ModelInterface(),
                               company_id=predictor_record.company_id)
    except Exception as e:
        session.refresh(predictor_record)
        predictor_record.data = {
            'error': f'{traceback.format_exc()}\nMain error: {e}'
        }
        session.commit()
        raise e

    try:
        dbw.register_predictors([mi.get_model_data(predictor_record.name)])
    except Exception as e:
        log.warn(e)
コード例 #8
0
def wait_db(config, db_name):
    m = DatabaseWrapper(config)

    start_time = time.time()

    connected = m.check_connections()[db_name]

    while not connected and (time.time() - start_time) < START_TIMEOUT:
        time.sleep(2)
        connected = m.check_connections()[db_name]

    return connected
コード例 #9
0
    def delete_model(self, name, company_id: int):
        original_name = name
        name = f'{company_id}@@@@@{name}'

        db_p = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=original_name).first()
        if db_p is None:
            raise Exception(f"Predictor '{name}' does not exist")
        db.session.delete(db_p)
        if db_p.datasource_id is not None:
            try:
                dataset_record = db.Datasource.query.get(db_p.datasource_id)
                if (isinstance(dataset_record.data, str) and json.loads(
                        dataset_record.data).get('source_type') != 'file'):
                    DataStore().delete_datasource(dataset_record.name,
                                                  company_id)
            except Exception:
                pass
        db.session.commit()

        DatabaseWrapper(company_id).unregister_predictor(name)

        # delete from s3
        self.fs_store.delete(f'predictor_{company_id}_{db_p.id}')

        return 0
コード例 #10
0
ファイル: initialize.py プロジェクト: szhorizon/mindsdb
def initialize_interfaces(app):
    app.default_store = DataStore()
    app.naitve_interface = NativeInterface()
    app.custom_models = CustomModels()
    app.dbw = DatabaseWrapper()
    config = Config()
    app.config_obj = config
コード例 #11
0
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import mindsdb_native
        import setproctitle

        try:
            setproctitle.setproctitle('mindsdb_native_process')
        except Exception:
            pass

        config = Config()
        fs_store = FsSotre()
        company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
        name, from_data, to_predict, kwargs, datasource_id = self._args

        mdb = mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'})

        predictor_record = Predictor.query.filter_by(company_id=company_id, name=name).first()
        predictor_record.datasource_id = datasource_id
        predictor_record.to_predict = to_predict
        predictor_record.version = mindsdb_native.__version__
        predictor_record.data = {
            'name': name,
            'status': 'training'
        }
        #predictor_record.datasource_id = ... <-- can be done once `learn` is passed a datasource name
        session.commit()

        to_predict = to_predict if isinstance(to_predict, list) else [to_predict]
        data_source = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs'])

        try:
            mdb.learn(
                from_data=data_source,
                to_predict=to_predict,
                **kwargs
            )
        except Exception:
            pass

        fs_store.put(name, f'predictor_{company_id}_{predictor_record.id}', config['paths']['predictors'])

        model_data = mindsdb_native.F.get_model_data(name)

        predictor_record = Predictor.query.filter_by(company_id=company_id, name=name).first()
        predictor_record.data = model_data
        session.commit()

        DatabaseWrapper().register_predictors([model_data])
コード例 #12
0
 def rename_model(self, old_name, new_name, company_id: int):
     db_p = db.session.query(db.Predictor).filter_by(company_id=company_id, name=old_name).first()
     db_p.name = new_name
     db.session.commit()
     dbw = DatabaseWrapper(company_id)
     dbw.unregister_predictor(old_name)
     dbw.register_predictors([self.get_model_data(new_name, company_id)])
コード例 #13
0
def run_learn(name, from_data, to_predict, kwargs, datasource_id):
    import mindsdb_native
    import mindsdb_datasources
    import mindsdb

    create_process_mark('learn')

    config = Config()
    fs_store = FsSotre()

    company_id = os.environ.get('MINDSDB_COMPANY_ID', None)

    mdb = mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'})

    predictor_record = Predictor.query.filter_by(company_id=company_id,
                                                 name=name).first()
    predictor_record.datasource_id = datasource_id
    predictor_record.to_predict = to_predict
    predictor_record.native_version = mindsdb_native.__version__
    predictor_record.mindsdb_version = mindsdb_version
    predictor_record.learn_args = {'to_predict': to_predict, 'kwargs': kwargs}
    predictor_record.data = {'name': name, 'status': 'training'}
    session.commit()

    to_predict = to_predict if isinstance(to_predict, list) else [to_predict]
    data_source = getattr(mindsdb_datasources,
                          from_data['class'])(*from_data['args'],
                                              **from_data['kwargs'])

    try:
        mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs)
    except Exception as e:
        log = logging.getLogger('mindsdb.main')
        log.error(f'Predictor learn error: {e}')
        predictor_record.data = {'name': name, 'status': 'error'}
        session.commit()
        delete_process_mark('learn')
        return

    fs_store.put(name, f'predictor_{company_id}_{predictor_record.id}',
                 config['paths']['predictors'])

    model_data = mindsdb_native.F.get_model_data(name)

    predictor_record = Predictor.query.filter_by(company_id=company_id,
                                                 name=name).first()
    predictor_record.data = model_data
    session.commit()

    DatabaseWrapper().register_predictors([model_data])
    delete_process_mark('learn')
コード例 #14
0
ファイル: config.py プロジェクト: yash-1002/mindsdb
 def post(self, name):
     params = request.json.get('params')
     if not isinstance(params, dict):
         abort(400, "type of 'params' must be dict")
     integration = get_integration(name)
     if integration is None:
         abort(400, f"Nothin to modify. '{name}' not exists.")
     try:
         ca.config_obj.modify_db_integration(name, params)
         DatabaseWrapper(ca.config_obj)
     except Exception as e:
         print(traceback.format_exc())
         abort(500, f'Error during integration modifycation: {str(e)}')
     return '', 200
コード例 #15
0
ファイル: config.py プロジェクト: yash-1002/mindsdb
 def put(self, name):
     params = request.json.get('params')
     if not isinstance(params, dict):
         abort(400, "type of 'params' must be dict")
     integration = get_integration(name)
     if integration is not None:
         abort(400, f"Integration with name '{name}' already exists")
     try:
         ca.config_obj.add_db_integration(name, params)
         DatabaseWrapper(ca.config_obj)
     except Exception as e:
         print(traceback.format_exc())
         abort(500, f'Error during config update: {str(e)}')
     return '', 200
コード例 #16
0
    def delete_model(self, name, company_id: int):
        original_name = name
        name = f'{company_id}@@@@@{name}'

        db_p = db.session.query(db.Predictor).filter_by(
            company_id=company_id, name=original_name).first()
        db.session.delete(db_p)
        db.session.commit()

        DatabaseWrapper(company_id).unregister_predictor(name)

        # delete from s3
        self.fs_store.delete(f'predictor_{company_id}_{db_p.id}')

        return 0
コード例 #17
0
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import mindsdb_native

        name, from_data, to_predict, kwargs, config, trx_type = self._args

        mdb = mindsdb_native.Predictor(name=name)

        if trx_type == 'learn':
            to_predict = to_predict if isinstance(to_predict,
                                                  list) else [to_predict]
            data_source = getattr(mindsdb_native,
                                  from_data['class'])(*from_data['args'],
                                                      **from_data['kwargs'])
            mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs)

            stats = mindsdb_native.F.get_model_data(name)['data_analysis_v2']

            DatabaseWrapper(config).register_predictors([{
                'name': name,
                'predict': to_predict,
                'data_analysis': stats
            }],
                                                        setup=False)

        if trx_type == 'predict':
            if isinstance(from_data, dict):
                when_data = from_data
            else:
                when_data = getattr(mindsdb_native,
                                    from_data['class'])(*from_data['args'],
                                                        **from_data['kwargs'])

            predictions = mdb.predict(when_data=when_data,
                                      run_confidence_variation_analysis=True,
                                      **kwargs)

            # @TODO Figure out a way to recover this since we are using `spawn` here... simple Queue or instiating a Multiprocessing manager and registering a value in a dict using that. Or using map from a multiprocessing pool with 1x process (though using a custom process there might be it's own bucket of annoying)
            return predictions
コード例 #18
0
ファイル: config.py プロジェクト: vishalbelsare/mindsdb
    def post(self, name):
        params = {}
        params.update((request.json or {}).get('params', {}))
        params.update(request.form or {})

        if not isinstance(params, dict):
            abort(400, "type of 'params' must be dict")
        integration = get_db_integration(name, request.company_id)
        if integration is None:
            abort(400, f"Nothin to modify. '{name}' not exists.")
        try:
            if 'enabled' in params:
                params['publish'] = params['enabled']
                del params['enabled']
            modify_db_integration(name, params, request.company_id)
            DatabaseWrapper(request.company_id).setup_integration(name)
        except Exception as e:
            log.error(str(e))
            abort(500, f'Error during integration modifycation: {str(e)}')
        return '', 200
コード例 #19
0
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import mindsdb_native

        name, from_data, to_predict, kwargs, config = self._args
        mdb = mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'})

        to_predict = to_predict if isinstance(to_predict, list) else [to_predict]
        data_source = getattr(mindsdb_native, from_data['class'])(*from_data['args'], **from_data['kwargs'])
        mdb.learn(
            from_data=data_source,
            to_predict=to_predict,
            **kwargs
        )

        model_data = mindsdb_native.F.get_model_data(name)

        DatabaseWrapper(config).register_predictors([model_data])
コード例 #20
0
ファイル: config.py プロジェクト: vishalbelsare/mindsdb
 def get(self, name):
     company_id = request.company_id
     if get_db_integration(name, company_id) is None:
         abort(404, f'Can\'t find database integration: {name}')
     connections = DatabaseWrapper(company_id).check_connections()
     return connections.get(name, False), 200
コード例 #21
0
class CustomModels():
    def __init__(self):
        self.config = Config()
        self.fs_store = FsSotre()
        self.company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
        self.dbw = DatabaseWrapper()
        self.storage_dir = self.config['paths']['custom_models']
        os.makedirs(self.storage_dir, exist_ok=True)
        self.model_cache = {}
        self.mindsdb_native = NativeInterface()
        self.dbw = DatabaseWrapper()

    def _dir(self, name):
        return str(os.path.join(self.storage_dir, name))

    def _internal_load(self, name):
        self.fs_store.get(name, f'custom_model_{self.company_id}_{name}',
                          self.storage_dir)
        sys.path.insert(0, self._dir(name))
        module = __import__(name)

        try:
            model = module.Model.load(
                os.path.join(self._dir(name), 'model.pickle'))
        except Exception as e:
            model = module.Model()
            model.initialize_column_types()
            if hasattr(model, 'setup'):
                model.setup()

        self.model_cache[name] = model

        return model

    def learn(self, name, from_data, to_predict, datasource_id, kwargs={}):
        model_data = self.get_model_data(name)
        model_data['status'] = 'training'
        self.save_model_data(name, model_data)

        to_predict = to_predict if isinstance(to_predict,
                                              list) else [to_predict]

        data_source = getattr(mindsdb_datasources,
                              from_data['class'])(*from_data['args'],
                                                  **from_data['kwargs'])
        data_frame = data_source.df
        model = self._internal_load(name)
        model.to_predict = to_predict

        model_data = self.get_model_data(name)
        model_data['predict'] = model.to_predict
        self.save_model_data(name, model_data)

        data_analysis = self.mindsdb_native.analyse_dataset(
            data_source)['data_analysis_v2']

        model_data = self.get_model_data(name)
        model_data['data_analysis_v2'] = data_analysis
        self.save_model_data(name, model_data)

        model.fit(data_frame, to_predict, data_analysis, kwargs)

        model.save(os.path.join(self._dir(name), 'model.pickle'))
        self.model_cache[name] = model

        model_data = self.get_model_data(name)
        model_data['status'] = 'completed'
        model_data['columns'] = list(data_analysis.keys())
        self.save_model_data(name, model_data)
        self.fs_store.put(name, f'custom_model_{self.company_id}_{name}',
                          self.storage_dir)

        self.dbw.unregister_predictor(name)
        self.dbw.register_predictors([self.get_model_data(name)])

    def predict(self, name, when_data=None, from_data=None, kwargs=None):
        self.fs_store.get(name, f'custom_model_{self.company_id}_{name}',
                          self.storage_dir)
        if kwargs is None:
            kwargs = {}
        if from_data is not None:
            if isinstance(from_data, dict):
                data_source = getattr(mindsdb_datasources, from_data['class'])(
                    *from_data['args'], **from_data['kwargs'])
            # assume that particular instance of any DataSource class is provided
            else:
                data_source = from_data
            data_frame = data_source.df
        elif when_data is not None:
            if isinstance(when_data, dict):
                for k in when_data:
                    when_data[k] = [when_data[k]]
                data_frame = pd.DataFrame(when_data)
            else:
                data_frame = pd.DataFrame(when_data)

        model = self._internal_load(name)
        predictions = model.predict(data_frame, kwargs)

        pred_arr = []
        for i in range(len(predictions)):
            pred_arr.append({})
            pred_arr[-1] = {}
            for col in predictions.columns:
                pred_arr[-1][col] = {}
                pred_arr[-1][col]['predicted_value'] = predictions[col].iloc[i]

        return pred_arr

    def get_model_data(self, name):
        predictor_record = Predictor.query.filter_by(
            company_id=self.company_id, name=name, is_custom=True).first()
        return predictor_record.data

    def save_model_data(self, name, data):
        predictor_record = Predictor.query.filter_by(
            company_id=self.company_id, name=name, is_custom=True).first()
        if predictor_record is None:
            predictor_record = Predictor(company_id=self.company_id,
                                         name=name,
                                         is_custom=True,
                                         data=data)
            session.add(predictor_record)
        else:
            predictor_record.data = data
        session.commit()

    def get_models(self):
        predictor_names = [
            x.name
            for x in Predictor.query.filter_by(company_id=self.company_id,
                                               is_custom=True)
        ]
        models = []
        for name in predictor_names:
            models.append(self.get_model_data(name))

        return models

    def delete_model(self, name):
        Predictor.query.filter_by(company_id=self.company_id,
                                  name=name,
                                  is_custom=True).delete()
        session.commit()
        shutil.rmtree(self._dir(name))
        self.dbw.unregister_predictor(name)
        self.fs_store.delete(f'custom_model_{self.company_id}_{name}')

    def rename_model(self, name, new_name):
        self.fs_store.get(name, f'custom_model_{self.company_id}_{name}',
                          self.storage_dir)

        self.dbw.unregister_predictor(name)
        shutil.move(self._dir(name), self._dir(new_name))
        shutil.move(os.path.join(self._dir(new_name) + f'{name}.py'),
                    os.path.join(self._dir(new_name), f'{new_name}.py'))

        predictor_record = Predictor.query.filter_by(
            company_id=self.company_id, name=name, is_custom=True).first()
        predictor_record.name = new_name
        session.commit()

        self.dbw.register_predictors([self.get_model_data(new_name)])

        self.fs_store.put(name, f'custom_model_{self.company_id}_{new_name}',
                          self.storage_dir)
        self.fs_store.delete(f'custom_model_{self.company_id}_{name}')

    def export_model(self, name):
        shutil.make_archive(base_name=name,
                            format='zip',
                            root_dir=self._dir(name))
        return str(self._dir(name)) + '.zip'

    def load_model(self, fpath, name, trained_status):
        shutil.unpack_archive(fpath, self._dir(name), 'zip')
        shutil.move(os.path.join(self._dir(name), 'model.py'),
                    os.path.join(self._dir(name), f'{name}.py'))
        model = self._internal_load(name)
        model.to_predict = model.to_predict if isinstance(
            model.to_predict, list) else [model.to_predict]
        self.save_model_data(
            name, {
                'name': name,
                'data_analysis_v2': model.column_type_map,
                'predict': model.to_predict,
                'status': trained_status,
                'is_custom': True,
                'columns': list(model.column_type_map.keys())
            })

        with open(os.path.join(self._dir(name), '__init__.py'), 'w') as fp:
            fp.write('')

        self.fs_store.put(name, f'custom_model_{self.company_id}_{name}',
                          self.storage_dir)

        if trained_status == 'trained':
            self.dbw.register_predictors([self.get_model_data(name)])
コード例 #22
0
def initialize_interfaces(config, app):
    app.default_store = DataStore(config)
    app.mindsdb_native = NativeInterface(config)
    app.custom_models = CustomModels(config)
    app.dbw = DatabaseWrapper(config)
    app.config_obj = config
コード例 #23
0
ファイル: native.py プロジェクト: Mechachleopteryx/mindsdb
 def __init__(self, config):
     self.config = config
     self.dbw = DatabaseWrapper(self.config)
     self.predictor_cache = {}
コード例 #24
0
ファイル: config.py プロジェクト: vishalbelsare/mindsdb
    def put(self, name):
        params = {}
        params.update((request.json or {}).get('params', {}))
        params.update(request.form or {})

        if len(params) == 0:
            abort(400, "type of 'params' must be dict")

        # params from FormData will be as text
        for key in ('publish', 'test', 'enabled'):
            if key in params:
                if isinstance(params[key],
                              str) and params[key].lower() in ('false', '0'):
                    params[key] = False
                else:
                    params[key] = bool(params[key])

        files = request.files
        temp_dir = None
        if files is not None:
            temp_dir = tempfile.mkdtemp(prefix='integration_files_')
            for key, file in files.items():
                temp_dir_path = Path(temp_dir)
                file_name = Path(file.filename)
                file_path = temp_dir_path.joinpath(file_name).resolve()
                if temp_dir_path not in file_path.parents:
                    raise Exception(f'Can not save file at path: {file_path}')
                file.save(file_path)
                params[key] = file_path

        is_test = params.get('test', False)
        if is_test:
            del params['test']
            db_type = params.get('type')
            checker_class = CHECKERS.get(db_type, None)
            if checker_class is None:
                abort(400, f"Unknown integration type: {db_type}")
            checker = checker_class(**params)
            if temp_dir is not None:
                shutil.rmtree(temp_dir)
            return {'success': checker.check_connection()}, 200

        integration = get_db_integration(name, request.company_id, False)
        if integration is not None:
            abort(400, f"Integration with name '{name}' already exists")

        try:
            if 'enabled' in params:
                params['publish'] = params['enabled']
                del params['enabled']
            add_db_integration(name, params, request.company_id)

            model_data_arr = []
            for model in request.model_interface.get_models():
                if model['status'] == 'complete':
                    try:
                        model_data_arr.append(
                            request.model_interface.get_model_data(
                                model['name']))
                    except Exception:
                        pass

            if is_test is False and params.get('publish', False) is True:
                model_data_arr = []
                for model in request.model_interface.get_models():
                    if model['status'] == 'complete':
                        try:
                            model_data_arr.append(
                                request.model_interface.get_model_data(
                                    model['name']))
                        except Exception:
                            pass
                DatabaseWrapper(request.company_id).setup_integration(name)
                DatabaseWrapper(request.company_id).register_predictors(
                    model_data_arr, name)
        except Exception as e:
            log.error(str(e))
            if temp_dir is not None:
                shutil.rmtree(temp_dir)
            abort(500, f'Error during config update: {str(e)}')

        if temp_dir is not None:
            shutil.rmtree(temp_dir)
        return '', 200
コード例 #25
0
 def __init__(self, config):
     self.config = config
     self.dbw = DatabaseWrapper(self.config)
コード例 #26
0
    mdb = MindsdbNative(config)
    cst = CustomModels(config)
    # @TODO Maybe just use `get_model_data` directly here ? Seems like a useless abstraction
    model_data_arr = [{
        'name':
        x['name'],
        'predict':
        x['predict'],
        'data_analysis':
        mdb.get_model_data(x['name'])['data_analysis_v2']
    } for x in mdb.get_models()]

    model_data_arr.extend(cst.get_models())

    dbw = DatabaseWrapper(config)
    dbw.register_predictors(model_data_arr)

    for broken_name in [
            name for name, connected in dbw.check_connections().items()
            if connected is False
    ]:
        print(
            f'Error failed to integrate with database aliased: {broken_name}')

    ctx = mp.get_context('spawn')

    for api_name, api_data in apis.items():
        print(f'{api_name} API: starting...')
        try:
            p = ctx.Process(target=start_functions[api_name],
コード例 #27
0
    start_functions = {
        'http': start_http,
        'mysql': start_mysql,
        'mongodb': start_mongo
    }

    archive_obsolete_predictors(config, '2.11.0')

    mdb = MindsdbNative(config)
    cst = CustomModels(config)

    remove_corrupted_predictors(config, mdb)

    model_data_arr = get_all_models_meta_data(mdb, cst)

    dbw = DatabaseWrapper(config)
    dbw.register_predictors(model_data_arr)

    for broken_name in [name for name, connected in dbw.check_connections().items() if connected is False]:
        log.error(f'Error failed to integrate with database aliased: {broken_name}')

    ctx = mp.get_context('spawn')

    for api_name, api_data in apis.items():
        print(f'{api_name} API: starting...')
        try:
            p = ctx.Process(target=start_functions[api_name], args=(config_path, args.verbose))
            p.start()
            api_data['process'] = p
        except Exception as e:
            close_api_gracefully(apis)
コード例 #28
0
ファイル: native.py プロジェクト: Mechachleopteryx/mindsdb
class NativeInterface():
    def __init__(self, config):
        self.config = config
        self.dbw = DatabaseWrapper(self.config)
        self.predictor_cache = {}

    def _invalidate_cached_predictors(self):
        # @TODO: Cache will become stale if the respective NativeInterface is not invoked yet a bunch of predictors remained cached, no matter where we invoke it. In practice shouldn't be a big issue though
        for predictor_name in list(self.predictor_cache.keys()):
            if (datetime.datetime.now() -
                    self.predictor_cache[predictor_name]['created']
                ).total_seconds() > 1200:
                del self.predictor_cache[predictor_name]

    def _setup_for_creation(self, name):
        if name in self.predictor_cache:
            del self.predictor_cache[name]
        # Here for no particular reason, because we want to run this sometimes but not too often
        self._invalidate_cached_predictors()

        predictor_dir = Path(self.config.paths['predictors']).joinpath(name)
        create_directory(predictor_dir)
        versions_file_path = predictor_dir.joinpath('versions.json')
        with open(str(versions_file_path), 'wt') as f:
            json.dump(self.config.versions, f, indent=4, sort_keys=True)

    def create(self, name):
        self._setup_for_creation(name)
        predictor = mindsdb_native.Predictor(name=name,
                                             run_env={'trigger': 'mindsdb'})
        return predictor

    def learn(self, name, from_data, to_predict, kwargs={}):
        join_learn_process = kwargs.get('join_learn_process', False)
        if 'join_learn_process' in kwargs:
            del kwargs['join_learn_process']

        self._setup_for_creation(name)

        p = LearnProcess(name, from_data, to_predict, kwargs,
                         self.config.get_all())
        p.start()
        if join_learn_process is True:
            p.join()
            if p.exitcode != 0:
                raise Exception('Learning process failed !')

    def predict(self, name, when_data=None, kwargs={}):
        if name not in self.predictor_cache:
            # Clear the cache entirely if we have less than .12 GB left
            if psutil.virtual_memory().available < 1.2 * pow(10, 9):
                self.predictor_cache = {}

            if F.get_model_data(name)['status'] == 'complete':
                self.predictor_cache[name] = {
                    'predictor':
                    mindsdb_native.Predictor(name=name,
                                             run_env={'trigger': 'mindsdb'}),
                    'created':
                    datetime.datetime.now()
                }

        predictions = self.predictor_cache[name]['predictor'].predict(
            when_data=when_data, **kwargs)

        return predictions

    def analyse_dataset(self, ds):
        return F.analyse_dataset(ds)

    def get_model_data(self, name, db_fix=True):
        model = F.get_model_data(name)

        # Make some corrections for databases not to break when dealing with empty columns
        if db_fix:
            data_analysis = model['data_analysis_v2']
            for column in data_analysis['columns']:
                analysis = data_analysis.get(column)
                if isinstance(analysis,
                              dict) and (len(analysis) == 0 or analysis.get(
                                  'empty', {}).get('is_empty', False)):
                    data_analysis[column]['typing'] = {
                        'data_subtype': DATA_SUBTYPES.INT
                    }

        return model

    def get_models(self):
        models = []
        predictors = [
            x for x in Path(self.config.paths['predictors']).iterdir()
            if x.is_dir() and x.joinpath('light_model_metadata.pickle').
            is_file() and x.joinpath('heavy_model_metadata.pickle').is_file()
        ]
        for p in predictors:
            model_name = p.name
            try:
                model_data = self.get_model_data(model_name, db_fix=False)
                if model_data['status'] == 'training' and parse_datetime(
                        model_data['created_at']) < parse_datetime(
                            self.config['mindsdb_last_started_at']):
                    continue

                reduced_model_data = {}

                for k in [
                        'name', 'version', 'is_active', 'predict', 'status',
                        'current_phase', 'accuracy', 'data_source'
                ]:
                    reduced_model_data[k] = model_data.get(k, None)

                for k in ['train_end_at', 'updated_at', 'created_at']:
                    reduced_model_data[k] = model_data.get(k, None)
                    if reduced_model_data[k] is not None:
                        try:
                            reduced_model_data[k] = parse_datetime(
                                str(reduced_model_data[k]).split('.')[0])
                        except Exception as e:
                            # @TODO Does this ever happen
                            print(
                                f'Date parsing exception while parsing: {k} in get_models: ',
                                e)
                            reduced_model_data[k] = parse_datetime(
                                str(reduced_model_data[k]))

                models.append(reduced_model_data)
            except Exception as e:
                print(
                    f"Can't list data for model: '{model_name}' when calling `get_models(), error: {e}`"
                )

        return models

    def delete_model(self, name):
        F.delete_model(name)
        self.dbw.unregister_predictor(name)

    def rename_model(self, name, new_name):
        self.dbw.unregister_predictor(self.get_model_data(name))
        F.rename_model(name, new_name)
        self.dbw.register_predictors(self.get_model_data(new_name))

    def load_model(self, fpath):
        name = F.import_model(model_archive_path=fpath)
        self.dbw.register_predictors(self.get_model_data(name), setup=False)

    def export_model(self, name):
        F.export_predictor(model_name=name)
コード例 #29
0
ファイル: __main__.py プロジェクト: stjordanis/mindsdb
    os.environ['DEFAULT_LOG_LEVEL'] = config['log']['level']['console']
    os.environ['LIGHTWOOD_LOG_LEVEL'] = config['log']['level']['console']

    # Switch to this once the native interface has it's own thread :/
    ctx = mp.get_context('spawn')

    from mindsdb.__about__ import __version__ as mindsdb_version
    print(f'Version {mindsdb_version}')

    print(f'Configuration file:\n   {config.config_path}')
    print(f"Storage path:\n   {config['paths']['root']}")

    # @TODO Backwards compatibiltiy for tests, remove later
    from mindsdb.interfaces.database.integrations import DatasourceController
    dbw = DatabaseWrapper(COMPANY_ID)
    model_interface = WithKWArgsWrapper(ModelInterface(),
                                        company_id=COMPANY_ID)
    datasource_interface = WithKWArgsWrapper(DatasourceController(),
                                             company_id=COMPANY_ID)
    raw_model_data_arr = model_interface.get_models()
    model_data_arr = []
    for model in raw_model_data_arr:
        if model['status'] == 'complete':
            x = model_interface.get_model_data(model['name'])
            try:
                model_data_arr.append(
                    model_interface.get_model_data(model['name']))
            except Exception:
                pass
コード例 #30
0
class MindsdbNative():
    def __init__(self, config):
        self.config = config
        self.dbw = DatabaseWrapper(self.config)

    def _setup_for_creation(self, name):
        predictor_dir = Path(self.config.paths['predictors']).joinpath(name)
        create_directory(predictor_dir)
        versions_file_path = predictor_dir.joinpath('versions.json')
        with open(str(versions_file_path), 'wt') as f:
            json.dump(self.config.versions, f, indent=4, sort_keys=True)

    def create(self, name):
        self._setup_for_creation(name)
        predictor = mindsdb_native.Predictor(name=name,
                                             run_env={'trigger': 'mindsdb'})
        return predictor

    def learn(self, name, from_data, to_predict, kwargs={}):
        join_learn_process = kwargs.get('join_learn_process', False)
        if 'join_learn_process' in kwargs:
            del kwargs['join_learn_process']

        self._setup_for_creation(name)

        p = PredictorProcess(name, from_data, to_predict, kwargs,
                             self.config.get_all(), 'learn')
        p.start()
        if join_learn_process is True:
            p.join()
            if p.exitcode != 0:
                raise Exception('Learning process failed !')

    def predict(self, name, when_data=None, kwargs={}):
        # @TODO Separate into two paths, one for "normal" predictions and one for "real time" predictions. Use the multiprocessing code commented out bellow for normal (once we figure out how to return the prediction object... else use the inline code but with the "real time" predict functionality of mindsdb_native taht will be implemented later)
        '''
        from_data = when if when is not None else when_data
        p = PredictorProcess(name, from_data, to_predict=None, kwargs=kwargs, config=self.config.get_all(), 'predict')
        p.start()
        predictions = p.join()
        '''
        mdb = mindsdb_native.Predictor(name=name,
                                       run_env={'trigger': 'mindsdb'})

        predictions = mdb.predict(when_data=when_data, **kwargs)

        return predictions

    def analyse_dataset(self, ds):
        return F.analyse_dataset(ds)

    def get_model_data(self, name, native_view=False):
        model = F.get_model_data(name)
        if native_view:
            return model

        data_analysis = model['data_analysis_v2']
        for column in data_analysis['columns']:
            if len(data_analysis[column]) == 0 or data_analysis[column].get(
                    'empty', {}).get('is_empty', False):
                data_analysis[column]['typing'] = {
                    'data_subtype': DATA_SUBTYPES.INT
                }

        return model

    def get_models(self, status='any'):
        models = F.get_models()
        if status != 'any':
            models = [x for x in models if x['status'] == status]
        models = [
            x for x in models
            if x['status'] != 'training' or parse_datetime(x['created_at']) >
            parse_datetime(self.config['mindsdb_last_started_at'])
        ]

        for i in range(len(models)):
            for k in ['train_end_at', 'updated_at', 'created_at']:
                if k in models[i] and models[i][k] is not None:
                    try:
                        models[i][k] = parse_datetime(
                            str(models[i][k]).split('.')[0])
                    except Exception:
                        models[i][k] = parse_datetime(str(models[i][k]))
        return models

    def delete_model(self, name):
        F.delete_model(name)
        self.dbw.unregister_predictor(name)

    def rename_model(self, name, new_name):
        self.dbw.unregister_predictor(self.get_model_data(name))
        F.rename_model(name, new_name)
        self.dbw.register_predictors(self.get_model_data(new_name),
                                     setup=False)

    def load_model(self, fpath):
        F.import_model(model_archive_path=fpath)
        # @TODO How do we figure out the name here ?
        # dbw.register_predictors(...)

    def export_model(self, name):
        F.export_predictor(model_name=name)