Beispiel #1
0
    def get_datasource_obj(self,
                           name=None,
                           id=None,
                           raw=False,
                           company_id=None):
        try:
            if name is not None:
                datasource_record = session.query(Datasource).filter_by(
                    company_id=company_id, name=name).first()
            else:
                datasource_record = session.query(Datasource).filter_by(
                    company_id=company_id, id=id).first()

            self.fs_store.get(
                f'{company_id}@@@@@{name}',
                f'datasource_{company_id}_{datasource_record.id}', self.dir)
            creation_info = json.loads(datasource_record.creation_info)
            if raw:
                return creation_info
            else:
                return eval(creation_info['class'])(*creation_info['args'],
                                                    **creation_info['kwargs'])
        except Exception as e:
            log.error(f'Error getting datasource {name}, exception: {e}')
            return None
Beispiel #2
0
 def start_analysis(self, name):
     datasource_record = session.query(Datasource).filter_by(
         company_id=self.company_id, name=name).first()
     if datasource_record.analysis is not None:
         return None
     semaphor_record = session.query(Semaphor).filter_by(
         company_id=self.company_id,
         entity_id=datasource_record.id,
         entity_type='datasource').first()
     if semaphor_record is None:
         semaphor_record = Semaphor(company_id=self.company_id,
                                    entity_id=datasource_record.id,
                                    entity_type='datasource',
                                    action='write')
         session.add(semaphor_record)
         session.commit()
     else:
         return
     try:
         analysis = self.mindsdb_native.analyse_dataset(
             self.get_datasource_obj(name, raw=True))
         datasource_record = session.query(Datasource).filter_by(
             company_id=self.company_id, name=name).first()
         datasource_record.analysis = json.dumps(analysis)
         session.commit()
     except Exception as e:
         log.error(e)
     finally:
         semaphor_record = session.query(Semaphor).filter_by(
             company_id=self.company_id,
             entity_id=datasource_record.id,
             entity_type='datasource').first()
         session.delete(semaphor_record)
         session.commit()
Beispiel #3
0
 def delete(self, name):
     try:
         session.query(StreamDB).filter_by(company_id=COMPANY_ID,
                                           name=name).delete()
         session.commit()
     except Exception as e:
         log.error(e)
         abort(400, str(e))
     return '', 200
Beispiel #4
0
 def delete_stream(self, predictor):
     """Deletes stream from database and stops it work by
     setting up a special threading.Event flag."""
     stream_name = f"{self.name}_{predictor}"
     self.log.debug(f"Integration {self.name}: deleting {stream_name}")
     session.query(Stream).filter_by(company_id=self.company_id, integration=self.name, name=stream_name).delete()
     session.commit()
     if stream_name in self.streams:
         self.streams[stream_name].set()
         del self.streams[stream_name]
Beispiel #5
0
 def get_analysis(self, name, company_id=None):
     dataset_record = session.query(Dataset).filter_by(
         company_id=company_id, name=name).first()
     if dataset_record.analysis_id is None:
         return None
     analysis_record = session.query(Analysis).get(
         dataset_record.analysis_id)
     if analysis_record is None:
         return None
     analysis = json.loads(analysis_record.analysis)
     return analysis
Beispiel #6
0
def get_db_integration(name,
                       company_id,
                       sensitive_info=True,
                       case_sensitive=False):
    if case_sensitive:
        integration_record = session.query(Integration).filter_by(
            company_id=company_id, name=name).first()
    else:
        integration_record = session.query(Integration).filter(
            (Integration.company_id == company_id)
            & (func.lower(Integration.name) == func.lower(name))).first()
    return _get_integration_record_data(integration_record, sensitive_info)
Beispiel #7
0
 def get(self, id=None, name=None, company_id=None):
     if id is not None:
         records = session.query(View).filter_by(
             id=id, company_id=company_id).all()
     elif name is not None:
         records = session.query(View).filter_by(
             name=name, company_id=company_id).all()
     if len(records) == 0:
         raise Exception(f"Can't find view with name/id: {name}/{id}")
     elif len(records) > 1:
         raise Exception(
             f"There are multiple views with name/id: {name}/{id}")
     record = records[0]
     return self._get_view_record_data(record)
Beispiel #8
0
 def get_files_names(self, company_id=None):
     """ return list of files names
     """
     return [
         x[0]
         for x in session.query(File.name).filter_by(company_id=company_id)
     ]
 def _unlock_predictor(self, id: int) -> None:
     from mindsdb.interfaces.storage.db import session, Semaphor
     semaphor_record = session.query(Semaphor).filter_by(
         entity_id=id, entity_type='predictor').first()
     if semaphor_record is not None:
         session.delete(semaphor_record)
         session.commit()
Beispiel #10
0
 def make_prediction(self):
     predict_record = session.query(DBPredictor).filter_by(
         company_id=self.company_id, name=self.predictor).first()
     if predict_record is None:
         log.error(
             f"Error creating stream: requested predictor {self.predictor} is not exist"
         )
         return
     while not self.stop_event.wait(0.5):
         try:
             msg_str = next(self.consumer)
             when_data = json.loads(msg_str.value)
             result = self.native_interface.predict(self.predictor,
                                                    self.format_flag,
                                                    when_data=when_data)
             log.error(f"STREAM: got {result}")
             for res in result:
                 in_json = json.dumps({"prediction": res})
                 to_send = in_json.encode('utf-8')
                 log.error(f"sending {to_send}")
                 self.producer.send(self.stream_out_name, to_send)
         except StopIteration:
             pass
     log.error("Stopping stream..")
     self.producer.close()
     self.consumer.close()
     session.close()
Beispiel #11
0
 def get_all(self, company_id=None):
     view_records = session.query(View).filter_by(
         company_id=company_id).all()
     views_dict = {}
     for record in view_records:
         views_dict[record.name] = self._get_view_record_data(record)
     return views_dict
Beispiel #12
0
    def make_timeseries_predictions(self):
        log.error("STREAM: running 'make_timeseries_predictions'")
        predict_record = session.query(DBPredictor).filter_by(
            company_id=self.company_id, name=self.predictor).first()
        if predict_record is None:
            log.error(
                f"Error creating stream: requested predictor {self.predictor} is not exist"
            )
            return
        self.target = self._get_target()
        self.window = self._get_window_size()
        self.gb = self._get_gb()
        self.dt = self._get_dt()

        while not self.stop_event.wait(0.5):
            try:
                msg_str = next(self.consumer)
                when_data = json.loads(msg_str.value)
                self.to_cache(when_data)
            except StopIteration:
                pass

        log.error("Stopping stream..")
        self.producer.close()
        self.consumer.close()
        session.close()
Beispiel #13
0
    def save_datasource(self, name, source_type, source, file_path=None, company_id=None):
        if source_type == 'file' and (file_path is None):
            raise Exception('`file_path` argument required when source_type == "file"')

        datasource_record = session.query(Datasource).filter_by(company_id=company_id, name=name).first()
        while datasource_record is not None:
            raise Exception(f'Datasource with name {name} already exists')

        try:
            datasource_record = Datasource(
                company_id=company_id,
                name=name,
                datasources_version=mindsdb_datasources.__version__,
                mindsdb_version=mindsdb_version
            )
            session.add(datasource_record)
            session.commit()

            ds_meta_dir = os.path.join(self.dir, f'{company_id}@@@@@{name}')
            os.mkdir(ds_meta_dir)

            ds, creation_info = self.create_datasource(source_type, source, file_path, company_id, ds_meta_dir)

            if hasattr(ds, 'get_columns') and hasattr(ds, 'get_row_count'):
                try:
                    column_names = ds.get_columns()
                    row_count = ds.get_row_count()
                except Exception:
                    df = ds.df
                    column_names = list(df.keys())
                    row_count = len(df)
            else:
                df = ds.df
                column_names = list(df.keys())
                row_count = len(df)

            if '' in column_names or len(column_names) != len(set(column_names)):
                shutil.rmtree(ds_meta_dir)
                raise Exception('Each column in datasource must have unique non-empty name')

            datasource_record.creation_info = json.dumps(creation_info)
            datasource_record.data = json.dumps({
                'source_type': source_type,
                'source': source,
                'row_count': row_count,
                'columns': [dict(name=x) for x in column_names]
            })

            self.fs_store.put(f'{company_id}@@@@@{name}', f'datasource_{company_id}_{datasource_record.id}', self.dir)
            session.commit()

        except Exception as e:
            log.error(f'Error creating datasource {name}, exception: {e}')
            try:
                self.delete_datasource(name, company_id=company_id)
            except Exception:
                pass
            raise e

        return self.get_datasource_obj(name, raw=True, company_id=company_id)
Beispiel #14
0
def run_fit(predictor_id: int, df: pd.DataFrame) -> None:
    try:
        predictor_record = session.query(db.Predictor).filter_by(id=predictor_id).first()
        assert predictor_record is not None

        fs_store = FsStore()
        config = Config()

        predictor_record.data = {'training_log': 'training'}
        session.commit()
        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
        predictor.learn(df)

        session.refresh(predictor_record)

        fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}'
        pickle_path = os.path.join(config['paths']['predictors'], fs_name)
        predictor.save(pickle_path)

        fs_store.put(fs_name, fs_name, config['paths']['predictors'])

        predictor_record.data = predictor.model_analysis.to_dict()
        predictor_record.dtype_dict = predictor.dtype_dict
        session.commit()

        dbw = DatabaseWrapper(predictor_record.company_id)
        mi = ModelInterfaceWrapper(ModelInterface(), predictor_record.company_id)
        dbw.register_predictors([mi.get_model_data(predictor_record.name)])
    except Exception as e:
        session.refresh(predictor_record)
        predictor_record.data = {'error': f'{traceback.format_exc()}\nMain error: {e}'}
        session.commit()
        raise e
Beispiel #15
0
def get_logs(min_timestamp, max_timestamp, context, level, log_from, limit):
    logs = session.query(Log).filter(
        Log.company_id == os.environ.get('MINDSDB_COMPANY_ID', None),
        Log.created_at > min_timestamp
    )

    if max_timestamp is not None:
        logs = logs.filter(Log.created_at < max_timestamp)

    if context is not None:
        # e.g. datasource/predictor and assoicated id
        pass

    if level is not None:
        logs = logs.filter(Log.log_type == level)

    if log_from is not None:
        # mindsdb/native/lightwood/all
        pass

    if limit is not None:
        logs = logs.limit(limit)

    logs = [fmt_log_record(x) for x in logs]
    return logs
Beispiel #16
0
 def get_analysis(self, name, company_id=None):
     datasource_record = session.query(Datasource).filter_by(
         company_id=company_id, name=name).first()
     if datasource_record.analysis is None:
         return None
     analysis = json.loads(datasource_record.analysis)
     return analysis
Beispiel #17
0
 def get_ai_tables(self):
     ''' get list of ai tables
     '''
     aitable_records = [
         x.__dict__ for x in session.query(AITable).filter_by(
             company_id=self.company_id)
     ]
     return aitable_records
Beispiel #18
0
 def should_i_exist(self):
     config_record = session.query(Configuration).filter_by(
         company_id=self.company_id).first()
     if config_record is None:
         return False
     integrations = json.loads(config_record.data)["integrations"]
     if self.name not in integrations:
         return False
     return True
Beispiel #19
0
    def start_analysis(self, name, company_id=None):
        dataset_record = session.query(Dataset).filter_by(
            company_id=company_id, name=name).first()
        if dataset_record.analysis_id is not None:
            return None

        semaphor_record = session.query(Semaphor).filter_by(
            company_id=company_id,
            entity_id=dataset_record.id,
            entity_type='dataset').first()

        if semaphor_record is None:
            semaphor_record = Semaphor(company_id=company_id,
                                       entity_id=dataset_record.id,
                                       entity_type='dataset',
                                       action='write')
            session.add(semaphor_record)
            session.commit()
        else:
            return

        try:
            analysis = self.model_interface.analyse_dataset(
                ds=self.get_datasource_obj(name,
                                           raw=True,
                                           company_id=company_id),
                company_id=company_id)
            dataset_record = session.query(Dataset).filter_by(
                company_id=company_id, name=name).first()
            analysis_record = Analysis(
                analysis=json.dumps(analysis, cls=CustomJSONEncoder))
            session.add(analysis_record)
            session.flush()
            dataset_record.analysis_id = analysis_record.id
            session.commit()
        except Exception as e:
            log.error(e)
        finally:
            semaphor_record = session.query(Semaphor).filter_by(
                company_id=company_id,
                entity_id=dataset_record.id,
                entity_type='dataset').first()
            session.delete(semaphor_record)
            session.commit()
Beispiel #20
0
def get_db_integrations(company_id, sensitive_info=True):
    integration_records = session.query(Integration).filter_by(
        company_id=company_id).all()
    integration_dict = {}
    for record in integration_records:
        if record is None or record.data is None:
            continue
        integration_dict[record.name] = _get_integration_record_data(
            record, sensitive_info)
    return integration_dict
Beispiel #21
0
 def get_file_meta(self, name, company_id=None):
     file_record = session.query(File).filter_by(company_id=company_id,
                                                 name=name).first()
     if file_record is None:
         return None
     return {
         'name': file_record.name,
         'columns': file_record.columns,
         'row_count': file_record.row_count
     }
Beispiel #22
0
def modify_db_integration(name, data, company_id):
    integration_record = session.query(Integration).filter_by(
        company_id=company_id, name=name).first()
    old_data = deepcopy(integration_record.data)
    for k in old_data:
        if k not in data:
            data[k] = old_data[k]

    integration_record.data = data
    session.commit()
Beispiel #23
0
 def get_datasources(self, name=None):
     datasource_arr = []
     if name is not None:
         datasource_record_arr = session.query(Datasource).filter_by(
             company_id=self.company_id, name=name)
     else:
         datasource_record_arr = session.query(Datasource).filter_by(
             company_id=self.company_id)
     for datasource_record in datasource_record_arr:
         try:
             datasource = json.loads(datasource_record.data)
             datasource['created_at'] = datasource_record.created_at
             datasource['updated_at'] = datasource_record.updated_at
             datasource['name'] = datasource_record.name
             datasource['id'] = datasource_record.id
             datasource_arr.append(datasource)
         except Exception as e:
             log.error(e)
     return datasource_arr
Beispiel #24
0
 def delete_file(self, name, company_id):
     file_record = session.query(File).filter_by(company_id=company_id,
                                                 name=name).first()
     if file_record is None:
         return None
     file_id = file_record.id
     session.delete(file_record)
     session.commit()
     self.fs_store.delete(f'file_{company_id}_{file_id}')
     return True
Beispiel #25
0
    def save_datasource(self,
                        name,
                        source_type,
                        source=None,
                        file_path=None,
                        company_id=None):
        dataset_record = session.query(Dataset).filter_by(
            company_id=company_id, name=name).first()
        while dataset_record is not None:
            raise Exception(f'Dataset with name {name} already exists')

        if source_type == 'views':
            source_type = 'view_query'
        elif source_type == 'files':
            source_type = 'file'

        try:
            dataset_record = Dataset(
                company_id=company_id,
                name=name,
                datasources_version=mindsdb_datasources.__version__,
                mindsdb_version=mindsdb_version)
            session.add(dataset_record)
            session.commit()

            ds, creation_info = self.create_datasource(source_type, source,
                                                       file_path, company_id)

            ds_meta = self._get_ds_meta(ds)
            column_names = ds_meta['column_names']
            row_count = ds_meta['row_count']

            dataset_record.ds_class = creation_info['class']
            dataset_record.creation_info = json.dumps(creation_info)
            dataset_record.data = json.dumps({
                'source_type':
                source_type,
                'source':
                source,
                'row_count':
                row_count,
                'columns': [dict(name=x) for x in column_names]
            })

            session.commit()

        except Exception as e:
            log.error(f'Error creating dataset {name}, exception: {e}')
            try:
                self.delete_datasource(name, company_id=company_id)
            except Exception:
                pass
            raise e

        return self.get_datasource_obj(name, raw=True, company_id=company_id)
Beispiel #26
0
    def get_analysis(self, name):
        datasource_record = session.query(Datasource).filter_by(
            company_id=self.company_id, name=name).first()
        if datasource_record.analysis is None:
            datasource_record.analysis = json.dumps(
                self.mindsdb_native.analyse_dataset(
                    self.get_datasource_obj(name)))
            session.commit()

        analysis = json.loads(datasource_record.analysis)
        return analysis
Beispiel #27
0
    def stop_deleted_streams(self):
        existed_streams = session.query(Stream).filter_by(company_id=self.company_id, integration=self.name)
        actual_streams = [x.name for x in existed_streams]


        for stream in self.streams.copy():
            if stream not in actual_streams:
                # this stream is still running but it has been deleted from database.
                # need to stop it.
                self.log.error(f"INTEGRATION {self.name}: deleting {stream} stream.")
                self.streams[stream].set()
                del self.streams[stream]
Beispiel #28
0
 def get_datasets(self, name=None, company_id=None):
     dataset_arr = []
     if name is not None:
         dataset_record_arr = session.query(Dataset).filter_by(
             company_id=company_id, name=name)
     else:
         dataset_record_arr = session.query(Dataset).filter_by(
             company_id=company_id)
     for dataset_record in dataset_record_arr:
         try:
             if dataset_record.data is None:
                 continue
             dataset = json.loads(dataset_record.data)
             dataset['created_at'] = dataset_record.created_at
             dataset['updated_at'] = dataset_record.updated_at
             dataset['name'] = dataset_record.name
             dataset['id'] = dataset_record.id
             dataset_arr.append(dataset)
         except Exception as e:
             log.error(e)
     return dataset_arr
Beispiel #29
0
 def delete_datasource(self, name, company_id=None):
     datasource_record = Datasource.query.filter_by(company_id=company_id,
                                                    name=name).first()
     if not Config()["force_datasource_removing"]:
         linked_models = Predictor.query.filter_by(
             company_id=company_id,
             datasource_id=datasource_record.id).all()
         if linked_models:
             raise Exception(
                 "Can't delete {} datasource because there are next models linked to it: {}"
                 .format(name, [model.name for model in linked_models]))
     session.query(Semaphor).filter_by(company_id=company_id,
                                       entity_id=datasource_record.id,
                                       entity_type='datasource').delete()
     session.delete(datasource_record)
     session.commit()
     self.fs_store.delete(f'datasource_{company_id}_{datasource_record.id}')
     try:
         shutil.rmtree(os.path.join(self.dir, f'{company_id}@@@@@{name}'))
     except Exception:
         pass
Beispiel #30
0
    def _save(self):
        self._db_config = _null_to_empty(self._db_config)
        config_record = session.query(Configuration).filter_by(
            company_id=self.company_id).first()

        if config_record is not None:
            config_record.data = json.dumps(self._db_config)
        else:
            config_record = Configuration(company_id=self.company_id,
                                          data=json.dumps(self._db_config))
            session.add(config_record)

        session.commit()