Esempio n. 1
0
 def store_stream(self, stream):
     """Stories a created stream."""
     stream_name = f"{self.name}_{stream.predictor}"
     stream_rec = Stream(name=stream_name,
                         connection_params=self.connection_params,
                         advanced_params=self.advanced_info,
                         _type=stream._type,
                         predictor=stream.predictor,
                         integration=self.name,
                         company_id=self.company_id,
                         stream_in=stream.stream_in_name,
                         stream_out=stream.stream_out_name)
     session.add(stream_rec)
     session.commit()
     self.streams[stream_name] = stream.stop_event
Esempio n. 2
0
    def _try_outdate_db_status(self, predictor_record):
        from mindsdb_native import __version__ as native_version
        from mindsdb import __version__ as mindsdb_version
        from mindsdb.interfaces.storage.db import session

        if predictor_record.update_status == 'update_failed':
            return predictor_record

        if predictor_record.native_version != native_version:
            predictor_record.update_status = 'available'
        if predictor_record.mindsdb_version != mindsdb_version:
            predictor_record.update_status = 'available'

        session.commit()
        return predictor_record
Esempio n. 3
0
    def emit(self, record):
        log_type = record.levelname
        source = f'file: {record.pathname} - line: {record.lineno}'
        payload = record.msg

        if telemtry_enabled:
            pass
            # @TODO: Enable once we are sure no sensitive info is being outputed in the logs
            # if log_type in ['INFO']:
            #    add_breadcrumb(
            #        category='auth',
            #        message=str(payload),
            #        level='info',
            #    )
            # Might be too much traffic if we send this for users with slow networks
            #if log_type in ['DEBUG']:
            #    add_breadcrumb(
            #        category='auth',
            #        message=str(payload),
            #        level='debug',
            #    )

        if log_type in ['ERROR', 'WARNING']:
            trace = str(traceback.format_stack(limit=20))
            trac_log = Log(log_type='traceback',
                           source=source,
                           payload=trace,
                           company_id=self.company_id)
            session.add(trac_log)
            session.commit()

            if telemtry_enabled:
                add_breadcrumb(
                    category='stack_trace',
                    message=trace,
                    level='info',
                )
                if log_type in ['ERROR']:
                    capture_message(str(payload))
                if log_type in ['WARNING']:
                    capture_message(str(payload))

        log = Log(log_type=str(log_type),
                  source=source,
                  payload=str(payload),
                  company_id=self.company_id)
        session.add(log)
        session.commit()
Esempio n. 4
0
    def _lock_predictor(self, id, mode='write'):
        from mindsdb.interfaces.storage.db import session, Semaphor

        while True:
            semaphor_record = session.query(Semaphor).filter_by(company_id=self.company_id, entity_id=id, entity_type='predictor').first()
            if semaphor_record is not None:
                if mode == 'read' and semaphor_record.action == 'read':
                    return True
            try:
                semaphor_record = Semaphor(company_id=self.company_id, entity_id=id, entity_type='predictor', action=mode)
                session.add(semaphor_record)
                session.commit()
                return True
            except Excpetion as e:
                pass
            time.sleep(1)
Esempio n. 5
0
 def store_stream(self, stream):
     """Stories a created stream."""
     stream_name = f"{self.name}_{stream.predictor}"
     stream_rec = Stream(name=stream_name,
                         host=stream.host,
                         port=stream.port,
                         db=stream.db,
                         _type=stream._type,
                         predictor=stream.predictor,
                         integration=self.name,
                         company_id=self.company_id,
                         stream_in=stream.stream_in_name,
                         stream_out=stream.stream_out_name)
     session.add(stream_rec)
     session.commit()
     self.streams[stream_name] = stream.stop_event
Esempio n. 6
0
    def run(self):
        '''
        running at subprocess due to
        ValueError: signal only works in main thread

        this is work for celery worker here?
        '''
        import mindsdb_native

        config = Config()
        fs_store = FsSotre()
        company_id = os.environ.get('MINDSDB_COMPANY_ID', None)
        name, from_data, to_predict, kwargs, datasource_id = self._args

        mdb = mindsdb_native.Predictor(name=name,
                                       run_env={'trigger': 'mindsdb'})

        predictor_record = Predictor.query.filter_by(company_id=company_id,
                                                     name=name).first()
        predictor_record.datasource_id = datasource_id
        predictor_record.to_predict = to_predict
        predictor_record.version = mindsdb_native.__version__
        predictor_record.data = {'name': name, 'status': 'training'}
        #predictor_record.datasource_id = ... <-- can be done once `learn` is passed a datasource name
        session.commit()

        to_predict = to_predict if isinstance(to_predict,
                                              list) else [to_predict]
        data_source = getattr(mindsdb_native,
                              from_data['class'])(*from_data['args'],
                                                  **from_data['kwargs'])
        try:
            mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs)
        except Exception as e:
            pass

        fs_store.put(name, f'predictor_{company_id}_{predictor_record.id}',
                     config['paths']['predictors'])

        model_data = mindsdb_native.F.get_model_data(name)

        predictor_record = Predictor.query.filter_by(company_id=company_id,
                                                     name=name).first()
        predictor_record.data = model_data
        session.commit()

        DatabaseWrapper().register_predictors([model_data])
Esempio n. 7
0
    def put(self, name):
        params = request.json.get('params')
        if not isinstance(params, dict):
            abort(400, "type of 'params' must be dict")
        for param in [
                "predictor", "stream_in", "stream_out", "integration_name"
        ]:
            if param not in params:
                abort(400, f"'{param}' is missed.")
        integration_name = params['integration_name']
        integration_info = get_integration(integration_name)
        if not integration_info:
            abort(400, f"integration '{integration_name}' doesn't exist.")
        if integration_info["type"] not in ['redis', 'kafka']:
            abort(
                400,
                f"only integration of redis or kafka might be used to crate redis streams. got: '{integration_info.type}' type"
            )
        connection_params = params.get('connect', {})
        advanced_params = params.get('advanced', {})
        predictor = params['predictor']
        stream_in = params['stream_in']
        stream_out = params['stream_out']
        _type = params.get('type', 'forecast')
        if _type.lower() == StreamTypes.timeseries:
            ts_params = params.get('ts_params')
        else:
            ts_params = {}
        if predictor not in get_predictors():
            abort(
                400,
                f"requested predictor '{predictor}' is not ready or doens't exist"
            )
        stream = StreamDB(_type=_type,
                          name=name,
                          connection_params=connection_params,
                          advanced_params=advanced_params,
                          predictor=predictor,
                          stream_in=stream_in,
                          stream_out=stream_out,
                          integration=integration_name,
                          company_id=COMPANY_ID,
                          ts_params=ts_params)

        session.add(stream)
        session.commit()
        return {"status": "success", "stream_name": name}, 200
Esempio n. 8
0
def run_learn_remote(df: DataFrame, predictor_id: int) -> None:
    try:
        serialized_df = json.dumps(df.to_dict())
        predictor_record = Predictor.query.with_for_update().get(predictor_id)
        resp = requests.post(predictor_record.data['train_url'],
                             json={
                                 'df': serialized_df,
                                 'target': predictor_record.to_predict[0]
                             })

        assert resp.status_code == 200
        predictor_record.data['status'] = 'complete'
    except Exception as e:
        predictor_record.data['status'] = 'error'
        predictor_record.data['error'] = str(resp.text)

    session.commit()
Esempio n. 9
0
    def put(self, name):
        params = request.json.get('params')
        if not isinstance(params, dict):
            abort(400, "type of 'params' must be dict")
        for param in [
                "host", "port", "predictor", "stream_in", "stream_out",
                "integration_name"
        ]:
            if param not in params:
                abort(400, f"'{param}' is missed.")
        integration_name = params['integration_name']
        integration_info = get_integration(integration_name)
        if not integration_info:
            abort(400, f"integration '{integration_name}' doesn't exist.")
        if integration_info.type != 'redis':
            abort(
                400,
                f"only integration of redis type might be used to crate redis streams. got: '{integration_info.type}' type"
            )
        host = integration_info['host']
        port = integration_info['port']
        db = integration_info.get('db', 0)
        predictor = params['predictor']
        stream_in = params['stream_in']
        stream_out = params['stream_out']
        _type = params.get('type', 'forecast')
        if predictor not in get_predictors():
            abort(
                400,
                f"requested predictor '{predictor}' is not ready or doens't exist"
            )
        stream = StreamDB(_type=_type,
                          name=name,
                          host=host,
                          port=port,
                          db=db,
                          predictor=predictor,
                          stream_in=stream_in,
                          stream_out=stream_out,
                          integration=integration_name,
                          company_id=COMPANY_ID)

        session.add(stream)
        session.commit()
        return {"status": "success", "stream_name": name}, 200
Esempio n. 10
0
    def save_file(self, name, file_path, file_name=None, company_id=None):
        """ Save the file to our store

            Args:
                name (str): with that name file will be available in sql api
                file_name (str): file name
                file_path (str): path to the file
                company_id (int): company id

            Returns:
                int: id of 'file' record in db
        """
        if file_name is None:
            file_name = Path(file_path).name

        try:
            ds_meta_dir = Path(self.dir).joinpath(f'{company_id}@@@@@{name}')
            ds_meta_dir.mkdir()

            source = ds_meta_dir.joinpath(file_name)
            shutil.move(file_path, str(source))

            ds = FileDS(str(source))
            ds_meta = self._get_ds_meta(ds)

            column_names = ds_meta['column_names']
            if ds_meta['column_names'] is not None:
                column_names = json.dumps(
                    [dict(name=x) for x in ds_meta['column_names']])
            file_record = File(name=name,
                               company_id=company_id,
                               source_file_path=file_name,
                               file_path=str(source),
                               row_count=ds_meta['row_count'],
                               columns=column_names)
            session.add(file_record)
            session.commit()
            self.fs_store.put(f'{company_id}@@@@@{name}',
                              f'file_{company_id}_{file_record.id}', self.dir)
        except Exception as e:
            log.error(e)
            shutil.rmtree(ds_meta_dir)
            raise

        return file_record.id
Esempio n. 11
0
    def update_model(self, name):
        from mindsdb_native import F
        from mindsdb_worker.updater.update_model import update_model
        from mindsdb.interfaces.storage.db import session, Predictor
        from mindsdb.interfaces.datastore.datastore import DataStore

        try:
            predictor_record = Predictor.query.filter_by(company_id=self.company_id, name=name, is_custom=False).first()
            predictor_record.update_status = 'updating'
            session.commit()
            update_model(name, self.delete_model, F.delete_model, self.learn, self._lock_context, self.company_id, self.config['paths']['predictors'], predictor_record, self.fs_store, DataStore())

            predictor_record = self._update_db_status(predictor_record)
        except Exception as e:
            log.error(e)
            predictor_record.update_status = 'update_failed'
            session.commit()
            return str(e)
Esempio n. 12
0
    def add(self, name, query, integration_name, company_id=None):
        integration_records = session.query(Integration).filter_by(
            company_id=company_id).all()
        integration_id = None
        for record in integration_records:
            if record.name == integration_name:
                integration_id = record.id
                break
        else:
            raise Exception(
                f"Can't find integration with name: {integration_name}")

        view_record = View(name=name,
                           company_id=company_id,
                           query=query,
                           integration_id=integration_id)
        session.add(view_record)
        session.commit()
Esempio n. 13
0
    def get_model_data(self, name, db_fix=True):
        from mindsdb_native import F
        from mindsdb_native.libs.constants.mindsdb import DATA_SUBTYPES
        from mindsdb.interfaces.storage.db import session, Predictor

        predictor_record = Predictor.query.filter_by(
            company_id=self.company_id, name=name, is_custom=False).first()
        predictor_record = self._try_outdate_db_status(predictor_record)
        model = predictor_record.data
        if model is None or model['status'] == 'training':
            try:
                self.fs_store.get(
                    name, f'predictor_{self.company_id}_{predictor_record.id}',
                    self.config['paths']['predictors'])
                new_model_data = mindsdb_native.F.get_model_data(name)
            except Exception:
                new_model_data = None

            if predictor_record.data is None or (
                    new_model_data is not None
                    and len(new_model_data) > len(predictor_record.data)):
                predictor_record.data = new_model_data
                model = new_model_data
                session.commit()

        # Make some corrections for databases not to break when dealing with empty columns
        if db_fix:
            data_analysis = model['data_analysis_v2']
            for column in model['columns']:
                analysis = data_analysis.get(column)
                if isinstance(analysis,
                              dict) and (len(analysis) == 0 or analysis.get(
                                  'empty', {}).get('is_empty', False)):
                    data_analysis[column]['typing'] = {
                        'data_subtype': DATA_SUBTYPES.INT
                    }

        model['created_at'] = str(
            parse_datetime(str(predictor_record.created_at).split('.')[0]))
        model['updated_at'] = str(
            parse_datetime(str(predictor_record.updated_at).split('.')[0]))
        model['predict'] = predictor_record.to_predict
        model['update'] = predictor_record.update_status
        return self._pack(model)
Esempio n. 14
0
    def _setup_for_creation(self, name):
        from mindsdb_datasources import FileDS, ClickhouseDS, MariaDS, MySqlDS, PostgresDS, MSSQLDS, MongoDS, SnowflakeDS, AthenaDS
        import mindsdb_native
        from mindsdb_native import F
        from mindsdb_native.libs.constants.mindsdb import DATA_SUBTYPES
        from mindsdb.interfaces.storage.db import session, Predictor


        if name in self.predictor_cache:
            del self.predictor_cache[name]
        # Here for no particular reason, because we want to run this sometimes but not too often
        self._invalidate_cached_predictors()

        predictor_dir = Path(self.config.paths['predictors']).joinpath(name)
        create_directory(predictor_dir)
        predictor_record = Predictor(company_id=self.company_id, name=name, is_custom=False)

        session.add(predictor_record)
        session.commit()
Esempio n. 15
0
    def rename_model(self, name, new_name):
        self.fs_store.get(name, f'custom_model_{self.company_id}_{name}',
                          self.storage_dir)

        self.dbw.unregister_predictor(name)
        shutil.move(self._dir(name), self._dir(new_name))
        shutil.move(os.path.join(self._dir(new_name) + f'{name}.py'),
                    os.path.join(self._dir(new_name), f'{new_name}.py'))

        predictor_record = Predictor.query.filter_by(
            company_id=self.company_id, name=name, is_custom=True).first()
        predictor_record.name = new_name
        session.commit()

        self.dbw.register_predictors([self.get_model_data(new_name)])

        self.fs_store.put(name, f'custom_model_{self.company_id}_{new_name}',
                          self.storage_dir)
        self.fs_store.delete(f'custom_model_{self.company_id}_{name}')
Esempio n. 16
0
def run_learn(df: DataFrame,
              problem_definition: ProblemDefinition,
              predictor_id: int,
              delete_ds_on_fail: Optional[bool] = False) -> None:
    try:
        run_generate(df, problem_definition, predictor_id)
        run_fit(predictor_id, df)
    except Exception as e:
        predictor_record = Predictor.query.with_for_update().get(predictor_id)
        if delete_ds_on_fail is True:
            linked_db_ds = Datasource.query.filter_by(
                id=predictor_record.datasource_id).first()
            if linked_db_ds is not None:
                predictors_with_ds = Predictor.query.filter(
                    (Predictor.id != predictor_id)
                    & (Predictor.datasource_id == linked_db_ds.id)).all()
                if len(predictors_with_ds) == 0:
                    session.delete(linked_db_ds)
                    predictor_record.datasource_id = None
        predictor_record.data = {"error": str(e)}
        session.commit()
Esempio n. 17
0
 def delete_datasource(self, name, company_id=None):
     datasource_record = Datasource.query.filter_by(company_id=company_id,
                                                    name=name).first()
     if not Config()["force_datasource_removing"]:
         linked_models = Predictor.query.filter_by(
             company_id=company_id,
             datasource_id=datasource_record.id).all()
         if linked_models:
             raise Exception(
                 "Can't delete {} datasource because there are next models linked to it: {}"
                 .format(name, [model.name for model in linked_models]))
     session.query(Semaphor).filter_by(company_id=company_id,
                                       entity_id=datasource_record.id,
                                       entity_type='datasource').delete()
     session.delete(datasource_record)
     session.commit()
     self.fs_store.delete(f'datasource_{company_id}_{datasource_record.id}')
     try:
         shutil.rmtree(os.path.join(self.dir, f'{company_id}@@@@@{name}'))
     except Exception:
         pass
Esempio n. 18
0
def run_fit(predictor_id: int, df: pd.DataFrame) -> None:
    try:
        predictor_record = session.query(db.Predictor).filter_by(id=predictor_id).first()
        assert predictor_record is not None

        fs_store = FsStore()
        config = Config()

        predictor_record.data = {'training_log': 'training'}
        session.commit()
        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
        predictor.learn(df)

        session.refresh(predictor_record)

        fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}'
        pickle_path = os.path.join(config['paths']['predictors'], fs_name)
        predictor.save(pickle_path)

        fs_store.put(fs_name, fs_name, config['paths']['predictors'])

        predictor_record.data = predictor.model_analysis.to_dict()
        predictor_record.dtype_dict = predictor.dtype_dict
        session.commit()

        dbw = DatabaseWrapper(predictor_record.company_id)
        mi = ModelInterfaceWrapper(ModelInterface(), predictor_record.company_id)
        dbw.register_predictors([mi.get_model_data(predictor_record.name)])
    except Exception as e:
        session.refresh(predictor_record)
        predictor_record.data = {'error': f'{traceback.format_exc()}\nMain error: {e}'}
        session.commit()
        raise e
Esempio n. 19
0
 def start_analysis(self, name):
     datasource_record = session.query(Datasource).filter_by(
         company_id=self.company_id, name=name).first()
     if datasource_record.analysis is not None:
         return None
     semaphor_record = session.query(Semaphor).filter_by(
         company_id=self.company_id,
         entity_id=datasource_record.id,
         entity_type='datasource').first()
     if semaphor_record is None:
         semaphor_record = Semaphor(company_id=self.company_id,
                                    entity_id=datasource_record.id,
                                    entity_type='datasource',
                                    action='write')
         session.add(semaphor_record)
         session.commit()
     else:
         return
     try:
         analysis = self.mindsdb_native.analyse_dataset(
             self.get_datasource_obj(name, raw=True))
         datasource_record = session.query(Datasource).filter_by(
             company_id=self.company_id, name=name).first()
         datasource_record.analysis = json.dumps(analysis)
         session.commit()
     except Exception as e:
         log.error(e)
     finally:
         semaphor_record = session.query(Semaphor).filter_by(
             company_id=self.company_id,
             entity_id=datasource_record.id,
             entity_type='datasource').first()
         session.delete(semaphor_record)
         session.commit()
Esempio n. 20
0
    def get_analysis(self, name):
        datasource_record = session.query(Datasource).filter_by(
            company_id=self.company_id, name=name).first()
        if datasource_record.analysis is None:
            try:
                original_process_title = setproctitle.getproctitle()
                setproctitle.setproctitle('mindsdb_native_process')
            except Exception:
                pass

            analysis = self.mindsdb_native.analyse_dataset(
                self.get_datasource_obj(name))
            datasource_record.analysis = json.dumps(analysis)
            session.commit()

            try:
                setproctitle.setproctitle(original_process_title)
            except Exception:
                pass

        analysis = json.loads(datasource_record.analysis)
        return analysis
Esempio n. 21
0
def run_fit(predictor_id: int, df: pd.DataFrame) -> None:
    try:
        predictor_record = Predictor.query.with_for_update().get(predictor_id)
        assert predictor_record is not None

        fs_store = FsStore()
        config = Config()

        predictor_record.data = {'training_log': 'training'}
        session.commit()
        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(
            predictor_record.code)
        predictor.learn(df)

        session.refresh(predictor_record)

        fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}'
        pickle_path = os.path.join(config['paths']['predictors'], fs_name)
        predictor.save(pickle_path)

        fs_store.put(fs_name, fs_name, config['paths']['predictors'])

        predictor_record.data = predictor.model_analysis.to_dict()

        # getting training time for each tried model. it is possible to do
        # after training only
        fit_mixers = list(predictor.runtime_log[x]
                          for x in predictor.runtime_log
                          if isinstance(x, tuple) and x[0] == "fit_mixer")
        submodel_data = predictor_record.data.get("submodel_data", [])
        # add training time to other mixers info
        if submodel_data and fit_mixers and len(submodel_data) == len(
                fit_mixers):
            for i, tr_time in enumerate(fit_mixers):
                submodel_data[i]["training_time"] = tr_time
        predictor_record.data["submodel_data"] = submodel_data

        predictor_record.dtype_dict = predictor.dtype_dict
        session.commit()

        dbw = DatabaseWrapper(predictor_record.company_id)
        mi = WithKWArgsWrapper(ModelInterface(),
                               company_id=predictor_record.company_id)
    except Exception as e:
        session.refresh(predictor_record)
        predictor_record.data = {
            'error': f'{traceback.format_exc()}\nMain error: {e}'
        }
        session.commit()
        raise e

    try:
        dbw.register_predictors([mi.get_model_data(predictor_record.name)])
    except Exception as e:
        log.warn(e)
Esempio n. 22
0
def run_learn(name, from_data, to_predict, kwargs, datasource_id):
    import mindsdb_native
    import mindsdb_datasources
    import mindsdb

    create_process_mark('learn')

    config = Config()
    fs_store = FsSotre()

    company_id = os.environ.get('MINDSDB_COMPANY_ID', None)

    mdb = mindsdb_native.Predictor(name=name, run_env={'trigger': 'mindsdb'})

    predictor_record = Predictor.query.filter_by(company_id=company_id,
                                                 name=name).first()
    predictor_record.datasource_id = datasource_id
    predictor_record.to_predict = to_predict
    predictor_record.native_version = mindsdb_native.__version__
    predictor_record.mindsdb_version = mindsdb_version
    predictor_record.learn_args = {'to_predict': to_predict, 'kwargs': kwargs}
    predictor_record.data = {'name': name, 'status': 'training'}
    session.commit()

    to_predict = to_predict if isinstance(to_predict, list) else [to_predict]
    data_source = getattr(mindsdb_datasources,
                          from_data['class'])(*from_data['args'],
                                              **from_data['kwargs'])

    try:
        mdb.learn(from_data=data_source, to_predict=to_predict, **kwargs)
    except Exception as e:
        log = logging.getLogger('mindsdb.main')
        log.error(f'Predictor learn error: {e}')
        predictor_record.data = {'name': name, 'status': 'error'}
        session.commit()
        delete_process_mark('learn')
        return

    fs_store.put(name, f'predictor_{company_id}_{predictor_record.id}',
                 config['paths']['predictors'])

    model_data = mindsdb_native.F.get_model_data(name)

    predictor_record = Predictor.query.filter_by(company_id=company_id,
                                                 name=name).first()
    predictor_record.data = model_data
    session.commit()

    DatabaseWrapper().register_predictors([model_data])
    delete_process_mark('learn')
Esempio n. 23
0
    def start_analysis(self, name, company_id=None):
        dataset_record = session.query(Dataset).filter_by(
            company_id=company_id, name=name).first()
        if dataset_record.analysis_id is not None:
            return None

        semaphor_record = session.query(Semaphor).filter_by(
            company_id=company_id,
            entity_id=dataset_record.id,
            entity_type='dataset').first()

        if semaphor_record is None:
            semaphor_record = Semaphor(company_id=company_id,
                                       entity_id=dataset_record.id,
                                       entity_type='dataset',
                                       action='write')
            session.add(semaphor_record)
            session.commit()
        else:
            return

        try:
            analysis = self.model_interface.analyse_dataset(
                ds=self.get_datasource_obj(name,
                                           raw=True,
                                           company_id=company_id),
                company_id=company_id)
            dataset_record = session.query(Dataset).filter_by(
                company_id=company_id, name=name).first()
            analysis_record = Analysis(
                analysis=json.dumps(analysis, cls=CustomJSONEncoder))
            session.add(analysis_record)
            session.flush()
            dataset_record.analysis_id = analysis_record.id
            session.commit()
        except Exception as e:
            log.error(e)
        finally:
            semaphor_record = session.query(Semaphor).filter_by(
                company_id=company_id,
                entity_id=dataset_record.id,
                entity_type='dataset').first()
            session.delete(semaphor_record)
            session.commit()
Esempio n. 24
0
    def save_datasource(self, name, source_type, source, file_path=None):
        datasource_record = Datasource(company_id=self.company_id, name=name)

        if source_type == 'file' and (file_path is None):
            raise Exception(
                '`file_path` argument required when source_type == "file"')

        ds_meta_dir = os.path.join(self.dir, name)
        os.mkdir(ds_meta_dir)

        session.add(datasource_record)
        session.commit()
        datasource_record = session.query(Datasource).filter_by(
            company_id=self.company_id, name=name).first()

        try:
            if source_type == 'file':
                source = os.path.join(ds_meta_dir, source)
                shutil.move(file_path, source)
                ds = FileDS(source)

                creation_info = {
                    'class': 'FileDS',
                    'args': [source],
                    'kwargs': {}
                }

            elif source_type in self.config['integrations']:
                integration = self.config['integrations'][source_type]

                ds_class_map = {
                    'clickhouse': ClickhouseDS,
                    'mariadb': MariaDS,
                    'mysql': MySqlDS,
                    'postgres': PostgresDS,
                    'mssql': MSSQLDS,
                    'mongodb': MongoDS,
                    'snowflake': SnowflakeDS
                }

                try:
                    dsClass = ds_class_map[integration['type']]
                except KeyError:
                    raise KeyError(
                        f"Unknown DS type: {source_type}, type is {integration['type']}"
                    )

                if integration['type'] in ['clickhouse']:
                    creation_info = {
                        'class': dsClass.__name__,
                        'args': [],
                        'kwargs': {
                            'query': source['query'],
                            'user': integration['user'],
                            'password': integration['password'],
                            'host': integration['host'],
                            'port': integration['port']
                        }
                    }
                    ds = dsClass(**creation_info['kwargs'])

                elif integration['type'] in [
                        'mssql', 'postgres', 'mariadb', 'mysql'
                ]:
                    creation_info = {
                        'class': dsClass.__name__,
                        'args': [],
                        'kwargs': {
                            'query': source['query'],
                            'user': integration['user'],
                            'password': integration['password'],
                            'host': integration['host'],
                            'port': integration['port']
                        }
                    }

                    if 'database' in integration:
                        creation_info['kwargs']['database'] = integration[
                            'database']

                    if 'database' in source:
                        creation_info['kwargs']['database'] = source[
                            'database']

                    ds = dsClass(**creation_info['kwargs'])

                elif integration['type'] == 'snowflake':
                    creation_info = {
                        'class': dsClass.__name__,
                        'args': [],
                        'kwargs': {
                            'query': source['query'],
                            'schema': source['schema'],
                            'warehouse': source['warehouse'],
                            'database': source['database'],
                            'host': integration['host'],
                            'password': integration['password'],
                            'user': integration['user'],
                            'account': integration['account']
                        }
                    }

                    ds = dsClass(**creation_info['kwargs'])

                elif integration['type'] == 'mongodb':
                    if isinstance(source['find'], str):
                        source['find'] = json.loads(source['find'])
                    creation_info = {
                        'class': dsClass.__name__,
                        'args': [],
                        'kwargs': {
                            'database': source['database'],
                            'collection': source['collection'],
                            'query': source['find'],
                            'user': integration['user'],
                            'password': integration['password'],
                            'host': integration['host'],
                            'port': integration['port']
                        }
                    }

                    ds = dsClass(**creation_info['kwargs'])
            else:
                # This probably only happens for urls
                ds = FileDS(source)
                creation_info = {
                    'class': 'FileDS',
                    'args': [source],
                    'kwargs': {}
                }

            df = ds.df

            if '' in df.columns or len(df.columns) != len(set(df.columns)):
                shutil.rmtree(ds_meta_dir)
                raise Exception(
                    'Each column in datasource must have unique non-empty name'
                )

            datasource_record.creation_info = json.dumps(creation_info)
            datasource_record.data = json.dumps({
                'source_type':
                source_type,
                'source':
                source,
                'row_count':
                len(df),
                'columns': [dict(name=x) for x in list(df.keys())]
            })

            self.fs_store.put(
                name, f'datasource_{self.company_id}_{datasource_record.id}',
                self.dir)

        except Exception:
            if os.path.isdir(ds_meta_dir):
                shutil.rmtree(ds_meta_dir)
            raise

        session.commit()
        return self.get_datasource_obj(name, raw=True), name
Esempio n. 25
0
 def delete_all_streams(self):
     for stream in self.streams.copy():
         self.streams[stream].set()
         del self.streams[stream]
     session.query(Stream).filter_by(company_id=self.company_id, integration=self.name).delete()
     session.commit()
Esempio n. 26
0
def run_update(name: str, company_id: int):
    original_name = name
    name = f'{company_id}@@@@@{name}'

    fs_store = FsStore()
    config = Config()
    data_store = WithKWArgsWrapper(DataStore(), company_id=company_id)

    try:
        predictor_record = Predictor.query.filter_by(
            company_id=company_id, name=original_name).first()
        assert predictor_record is not None

        predictor_record.update_status = 'updating'

        session.commit()
        ds = data_store.get_datasource_obj(None,
                                           raw=False,
                                           id=predictor_record.datasource_id)
        df = ds.df

        problem_definition = predictor_record.learn_args

        problem_definition['target'] = predictor_record.to_predict[0]

        if 'join_learn_process' in problem_definition:
            del problem_definition['join_learn_process']

        # Adapt kwargs to problem definition
        if 'timeseries_settings' in problem_definition:
            problem_definition['timeseries_settings'] = problem_definition[
                'timeseries_settings']

        if 'stop_training_in_x_seconds' in problem_definition:
            problem_definition['time_aim'] = problem_definition[
                'stop_training_in_x_seconds']

        json_ai = lightwood.json_ai_from_problem(df, problem_definition)
        predictor_record.json_ai = json_ai.to_dict()
        predictor_record.code = lightwood.code_from_json_ai(json_ai)
        predictor_record.data = {'training_log': 'training'}
        session.commit()
        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(
            predictor_record.code)
        predictor.learn(df)

        fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}'
        pickle_path = os.path.join(config['paths']['predictors'], fs_name)
        predictor.save(pickle_path)
        fs_store.put(fs_name, fs_name, config['paths']['predictors'])
        predictor_record.data = predictor.model_analysis.to_dict(
        )  # type: ignore
        session.commit()

        predictor_record.lightwood_version = lightwood_version
        predictor_record.mindsdb_version = mindsdb_version
        predictor_record.update_status = 'up_to_date'
        session.commit()

    except Exception as e:
        log.error(e)
        predictor_record.update_status = 'update_failed'  # type: ignore
        session.commit()
        return str(e)
Esempio n. 27
0
    def save_datasource(self,
                        name,
                        source_type,
                        source,
                        file_path=None,
                        company_id=None):
        if source_type == 'file' and (file_path is None):
            raise Exception(
                '`file_path` argument required when source_type == "file"')

        datasource_record = session.query(Datasource).filter_by(
            company_id=company_id, name=name).first()
        while datasource_record is not None:
            raise Exception(f'Datasource with name {name} already exists')

        try:
            datasource_record = Datasource(
                company_id=company_id,
                name=name,
                datasources_version=mindsdb_datasources.__version__,
                mindsdb_version=mindsdb_version)
            session.add(datasource_record)
            session.commit()

            ds_meta_dir = os.path.join(self.dir, f'{company_id}@@@@@{name}')
            os.mkdir(ds_meta_dir)

            ds, creation_info = self.create_datasource(source_type, source,
                                                       file_path, company_id,
                                                       ds_meta_dir)

            if hasattr(ds, 'get_columns') and hasattr(ds, 'get_row_count'):
                try:
                    column_names = ds.get_columns()
                    row_count = ds.get_row_count()
                except Exception:
                    df = ds.df
                    column_names = list(df.keys())
                    row_count = len(df)
            else:
                df = ds.df
                column_names = list(df.keys())
                row_count = len(df)

            if '' in column_names or len(column_names) != len(
                    set(column_names)):
                shutil.rmtree(ds_meta_dir)
                raise Exception(
                    'Each column in datasource must have unique non-empty name'
                )

            datasource_record.creation_info = json.dumps(creation_info)
            datasource_record.data = json.dumps({
                'source_type':
                source_type,
                'source':
                source,
                'row_count':
                row_count,
                'columns': [dict(name=x) for x in column_names]
            })

            self.fs_store.put(
                f'{company_id}@@@@@{name}',
                f'datasource_{company_id}_{datasource_record.id}', self.dir)
            session.commit()

        except Exception as e:
            log.error(f'Error creating datasource {name}, exception: {e}')
            try:
                self.delete_datasource(name, company_id=company_id)
            except Exception:
                pass
            raise e

        return self.get_datasource_obj(name, raw=True, company_id=company_id)
Esempio n. 28
0
def add_db_integration(name, data, company_id):
    if 'database_name' not in data:
        data['database_name'] = name
    if 'publish' not in data:
        data['publish'] = True

    bundle_path = data.get('secure_connect_bundle')
    if data.get('type') in ('cassandra',
                            'scylla') and _is_not_empty_str(bundle_path):
        if os.path.isfile(bundle_path) is False:
            raise Exception(f'Can not get access to file: {bundle_path}')
        integrations_dir = Config()['paths']['integrations']

        p = Path(bundle_path)
        data['secure_connect_bundle'] = p.name

        integration_record = Integration(name=name,
                                         data=data,
                                         company_id=company_id)
        session.add(integration_record)
        session.commit()
        integration_id = integration_record.id

        folder_name = f'integration_files_{company_id}_{integration_id}'
        integration_dir = os.path.join(integrations_dir, folder_name)
        create_directory(integration_dir)
        shutil.copyfile(bundle_path, os.path.join(integration_dir, p.name))

        FsStore().put(folder_name, integration_dir, integrations_dir)
    elif data.get('type') in ('mysql', 'mariadb'):
        ssl = data.get('ssl')
        files = {}
        temp_dir = None
        if ssl is True:
            for key in ['ssl_ca', 'ssl_cert', 'ssl_key']:
                if key not in data:
                    continue
                if os.path.isfile(data[key]) is False:
                    if _is_not_empty_str(data[key]) is False:
                        raise Exception(
                            "'ssl_ca', 'ssl_cert' and 'ssl_key' must be paths or inline certs"
                        )
                    if temp_dir is None:
                        temp_dir = tempfile.mkdtemp(
                            prefix='integration_files_')
                    cert_file_name = data.get(f'{key}_name', f'{key}.pem')
                    cert_file_path = os.path.join(temp_dir, cert_file_name)
                    with open(cert_file_path, 'wt') as f:
                        f.write(data[key])
                    data[key] = cert_file_path
                files[key] = data[key]
                p = Path(data[key])
                data[key] = p.name
        integration_record = Integration(name=name,
                                         data=data,
                                         company_id=company_id)
        session.add(integration_record)
        session.commit()
        integration_id = integration_record.id

        if len(files) > 0:
            integrations_dir = Config()['paths']['integrations']
            folder_name = f'integration_files_{company_id}_{integration_id}'
            integration_dir = os.path.join(integrations_dir, folder_name)
            create_directory(integration_dir)
            for file_path in files.values():
                p = Path(file_path)
                shutil.copyfile(file_path,
                                os.path.join(integration_dir, p.name))
            FsStore().put(folder_name, integration_dir, integrations_dir)
    else:
        integration_record = Integration(name=name,
                                         data=data,
                                         company_id=company_id)
        session.add(integration_record)
        session.commit()
Esempio n. 29
0
 def _unlock_predictor(self, id):
     from mindsdb.interfaces.storage.db import session, Semaphor
     semaphor_record = session.query(Semaphor).filter_by(company_id=self.company_id, entity_id=id, entity_type='predictor').first()
     if semaphor_record is not None:
         session.delete(semaphor_record)
         session.commit()