def get_datasource_obj(self, name=None, id=None, raw=False, company_id=None): try: if name is not None: datasource_record = session.query(Datasource).filter_by( company_id=company_id, name=name).first() else: datasource_record = session.query(Datasource).filter_by( company_id=company_id, id=id).first() self.fs_store.get( f'{company_id}@@@@@{name}', f'datasource_{company_id}_{datasource_record.id}', self.dir) creation_info = json.loads(datasource_record.creation_info) if raw: return creation_info else: return eval(creation_info['class'])(*creation_info['args'], **creation_info['kwargs']) except Exception as e: log.error(f'Error getting datasource {name}, exception: {e}') return None
def start_analysis(self, name): datasource_record = session.query(Datasource).filter_by( company_id=self.company_id, name=name).first() if datasource_record.analysis is not None: return None semaphor_record = session.query(Semaphor).filter_by( company_id=self.company_id, entity_id=datasource_record.id, entity_type='datasource').first() if semaphor_record is None: semaphor_record = Semaphor(company_id=self.company_id, entity_id=datasource_record.id, entity_type='datasource', action='write') session.add(semaphor_record) session.commit() else: return try: analysis = self.mindsdb_native.analyse_dataset( self.get_datasource_obj(name, raw=True)) datasource_record = session.query(Datasource).filter_by( company_id=self.company_id, name=name).first() datasource_record.analysis = json.dumps(analysis) session.commit() except Exception as e: log.error(e) finally: semaphor_record = session.query(Semaphor).filter_by( company_id=self.company_id, entity_id=datasource_record.id, entity_type='datasource').first() session.delete(semaphor_record) session.commit()
def delete(self, name): try: session.query(StreamDB).filter_by(company_id=COMPANY_ID, name=name).delete() session.commit() except Exception as e: log.error(e) abort(400, str(e)) return '', 200
def delete_stream(self, predictor): """Deletes stream from database and stops it work by setting up a special threading.Event flag.""" stream_name = f"{self.name}_{predictor}" self.log.debug(f"Integration {self.name}: deleting {stream_name}") session.query(Stream).filter_by(company_id=self.company_id, integration=self.name, name=stream_name).delete() session.commit() if stream_name in self.streams: self.streams[stream_name].set() del self.streams[stream_name]
def get_analysis(self, name, company_id=None): dataset_record = session.query(Dataset).filter_by( company_id=company_id, name=name).first() if dataset_record.analysis_id is None: return None analysis_record = session.query(Analysis).get( dataset_record.analysis_id) if analysis_record is None: return None analysis = json.loads(analysis_record.analysis) return analysis
def get_db_integration(name, company_id, sensitive_info=True, case_sensitive=False): if case_sensitive: integration_record = session.query(Integration).filter_by( company_id=company_id, name=name).first() else: integration_record = session.query(Integration).filter( (Integration.company_id == company_id) & (func.lower(Integration.name) == func.lower(name))).first() return _get_integration_record_data(integration_record, sensitive_info)
def get(self, id=None, name=None, company_id=None): if id is not None: records = session.query(View).filter_by( id=id, company_id=company_id).all() elif name is not None: records = session.query(View).filter_by( name=name, company_id=company_id).all() if len(records) == 0: raise Exception(f"Can't find view with name/id: {name}/{id}") elif len(records) > 1: raise Exception( f"There are multiple views with name/id: {name}/{id}") record = records[0] return self._get_view_record_data(record)
def get_files_names(self, company_id=None): """ return list of files names """ return [ x[0] for x in session.query(File.name).filter_by(company_id=company_id) ]
def _unlock_predictor(self, id: int) -> None: from mindsdb.interfaces.storage.db import session, Semaphor semaphor_record = session.query(Semaphor).filter_by( entity_id=id, entity_type='predictor').first() if semaphor_record is not None: session.delete(semaphor_record) session.commit()
def make_prediction(self): predict_record = session.query(DBPredictor).filter_by( company_id=self.company_id, name=self.predictor).first() if predict_record is None: log.error( f"Error creating stream: requested predictor {self.predictor} is not exist" ) return while not self.stop_event.wait(0.5): try: msg_str = next(self.consumer) when_data = json.loads(msg_str.value) result = self.native_interface.predict(self.predictor, self.format_flag, when_data=when_data) log.error(f"STREAM: got {result}") for res in result: in_json = json.dumps({"prediction": res}) to_send = in_json.encode('utf-8') log.error(f"sending {to_send}") self.producer.send(self.stream_out_name, to_send) except StopIteration: pass log.error("Stopping stream..") self.producer.close() self.consumer.close() session.close()
def get_all(self, company_id=None): view_records = session.query(View).filter_by( company_id=company_id).all() views_dict = {} for record in view_records: views_dict[record.name] = self._get_view_record_data(record) return views_dict
def make_timeseries_predictions(self): log.error("STREAM: running 'make_timeseries_predictions'") predict_record = session.query(DBPredictor).filter_by( company_id=self.company_id, name=self.predictor).first() if predict_record is None: log.error( f"Error creating stream: requested predictor {self.predictor} is not exist" ) return self.target = self._get_target() self.window = self._get_window_size() self.gb = self._get_gb() self.dt = self._get_dt() while not self.stop_event.wait(0.5): try: msg_str = next(self.consumer) when_data = json.loads(msg_str.value) self.to_cache(when_data) except StopIteration: pass log.error("Stopping stream..") self.producer.close() self.consumer.close() session.close()
def save_datasource(self, name, source_type, source, file_path=None, company_id=None): if source_type == 'file' and (file_path is None): raise Exception('`file_path` argument required when source_type == "file"') datasource_record = session.query(Datasource).filter_by(company_id=company_id, name=name).first() while datasource_record is not None: raise Exception(f'Datasource with name {name} already exists') try: datasource_record = Datasource( company_id=company_id, name=name, datasources_version=mindsdb_datasources.__version__, mindsdb_version=mindsdb_version ) session.add(datasource_record) session.commit() ds_meta_dir = os.path.join(self.dir, f'{company_id}@@@@@{name}') os.mkdir(ds_meta_dir) ds, creation_info = self.create_datasource(source_type, source, file_path, company_id, ds_meta_dir) if hasattr(ds, 'get_columns') and hasattr(ds, 'get_row_count'): try: column_names = ds.get_columns() row_count = ds.get_row_count() except Exception: df = ds.df column_names = list(df.keys()) row_count = len(df) else: df = ds.df column_names = list(df.keys()) row_count = len(df) if '' in column_names or len(column_names) != len(set(column_names)): shutil.rmtree(ds_meta_dir) raise Exception('Each column in datasource must have unique non-empty name') datasource_record.creation_info = json.dumps(creation_info) datasource_record.data = json.dumps({ 'source_type': source_type, 'source': source, 'row_count': row_count, 'columns': [dict(name=x) for x in column_names] }) self.fs_store.put(f'{company_id}@@@@@{name}', f'datasource_{company_id}_{datasource_record.id}', self.dir) session.commit() except Exception as e: log.error(f'Error creating datasource {name}, exception: {e}') try: self.delete_datasource(name, company_id=company_id) except Exception: pass raise e return self.get_datasource_obj(name, raw=True, company_id=company_id)
def run_fit(predictor_id: int, df: pd.DataFrame) -> None: try: predictor_record = session.query(db.Predictor).filter_by(id=predictor_id).first() assert predictor_record is not None fs_store = FsStore() config = Config() predictor_record.data = {'training_log': 'training'} session.commit() predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code) predictor.learn(df) session.refresh(predictor_record) fs_name = f'predictor_{predictor_record.company_id}_{predictor_record.id}' pickle_path = os.path.join(config['paths']['predictors'], fs_name) predictor.save(pickle_path) fs_store.put(fs_name, fs_name, config['paths']['predictors']) predictor_record.data = predictor.model_analysis.to_dict() predictor_record.dtype_dict = predictor.dtype_dict session.commit() dbw = DatabaseWrapper(predictor_record.company_id) mi = ModelInterfaceWrapper(ModelInterface(), predictor_record.company_id) dbw.register_predictors([mi.get_model_data(predictor_record.name)]) except Exception as e: session.refresh(predictor_record) predictor_record.data = {'error': f'{traceback.format_exc()}\nMain error: {e}'} session.commit() raise e
def get_logs(min_timestamp, max_timestamp, context, level, log_from, limit): logs = session.query(Log).filter( Log.company_id == os.environ.get('MINDSDB_COMPANY_ID', None), Log.created_at > min_timestamp ) if max_timestamp is not None: logs = logs.filter(Log.created_at < max_timestamp) if context is not None: # e.g. datasource/predictor and assoicated id pass if level is not None: logs = logs.filter(Log.log_type == level) if log_from is not None: # mindsdb/native/lightwood/all pass if limit is not None: logs = logs.limit(limit) logs = [fmt_log_record(x) for x in logs] return logs
def get_analysis(self, name, company_id=None): datasource_record = session.query(Datasource).filter_by( company_id=company_id, name=name).first() if datasource_record.analysis is None: return None analysis = json.loads(datasource_record.analysis) return analysis
def get_ai_tables(self): ''' get list of ai tables ''' aitable_records = [ x.__dict__ for x in session.query(AITable).filter_by( company_id=self.company_id) ] return aitable_records
def should_i_exist(self): config_record = session.query(Configuration).filter_by( company_id=self.company_id).first() if config_record is None: return False integrations = json.loads(config_record.data)["integrations"] if self.name not in integrations: return False return True
def start_analysis(self, name, company_id=None): dataset_record = session.query(Dataset).filter_by( company_id=company_id, name=name).first() if dataset_record.analysis_id is not None: return None semaphor_record = session.query(Semaphor).filter_by( company_id=company_id, entity_id=dataset_record.id, entity_type='dataset').first() if semaphor_record is None: semaphor_record = Semaphor(company_id=company_id, entity_id=dataset_record.id, entity_type='dataset', action='write') session.add(semaphor_record) session.commit() else: return try: analysis = self.model_interface.analyse_dataset( ds=self.get_datasource_obj(name, raw=True, company_id=company_id), company_id=company_id) dataset_record = session.query(Dataset).filter_by( company_id=company_id, name=name).first() analysis_record = Analysis( analysis=json.dumps(analysis, cls=CustomJSONEncoder)) session.add(analysis_record) session.flush() dataset_record.analysis_id = analysis_record.id session.commit() except Exception as e: log.error(e) finally: semaphor_record = session.query(Semaphor).filter_by( company_id=company_id, entity_id=dataset_record.id, entity_type='dataset').first() session.delete(semaphor_record) session.commit()
def get_db_integrations(company_id, sensitive_info=True): integration_records = session.query(Integration).filter_by( company_id=company_id).all() integration_dict = {} for record in integration_records: if record is None or record.data is None: continue integration_dict[record.name] = _get_integration_record_data( record, sensitive_info) return integration_dict
def get_file_meta(self, name, company_id=None): file_record = session.query(File).filter_by(company_id=company_id, name=name).first() if file_record is None: return None return { 'name': file_record.name, 'columns': file_record.columns, 'row_count': file_record.row_count }
def modify_db_integration(name, data, company_id): integration_record = session.query(Integration).filter_by( company_id=company_id, name=name).first() old_data = deepcopy(integration_record.data) for k in old_data: if k not in data: data[k] = old_data[k] integration_record.data = data session.commit()
def get_datasources(self, name=None): datasource_arr = [] if name is not None: datasource_record_arr = session.query(Datasource).filter_by( company_id=self.company_id, name=name) else: datasource_record_arr = session.query(Datasource).filter_by( company_id=self.company_id) for datasource_record in datasource_record_arr: try: datasource = json.loads(datasource_record.data) datasource['created_at'] = datasource_record.created_at datasource['updated_at'] = datasource_record.updated_at datasource['name'] = datasource_record.name datasource['id'] = datasource_record.id datasource_arr.append(datasource) except Exception as e: log.error(e) return datasource_arr
def delete_file(self, name, company_id): file_record = session.query(File).filter_by(company_id=company_id, name=name).first() if file_record is None: return None file_id = file_record.id session.delete(file_record) session.commit() self.fs_store.delete(f'file_{company_id}_{file_id}') return True
def save_datasource(self, name, source_type, source=None, file_path=None, company_id=None): dataset_record = session.query(Dataset).filter_by( company_id=company_id, name=name).first() while dataset_record is not None: raise Exception(f'Dataset with name {name} already exists') if source_type == 'views': source_type = 'view_query' elif source_type == 'files': source_type = 'file' try: dataset_record = Dataset( company_id=company_id, name=name, datasources_version=mindsdb_datasources.__version__, mindsdb_version=mindsdb_version) session.add(dataset_record) session.commit() ds, creation_info = self.create_datasource(source_type, source, file_path, company_id) ds_meta = self._get_ds_meta(ds) column_names = ds_meta['column_names'] row_count = ds_meta['row_count'] dataset_record.ds_class = creation_info['class'] dataset_record.creation_info = json.dumps(creation_info) dataset_record.data = json.dumps({ 'source_type': source_type, 'source': source, 'row_count': row_count, 'columns': [dict(name=x) for x in column_names] }) session.commit() except Exception as e: log.error(f'Error creating dataset {name}, exception: {e}') try: self.delete_datasource(name, company_id=company_id) except Exception: pass raise e return self.get_datasource_obj(name, raw=True, company_id=company_id)
def get_analysis(self, name): datasource_record = session.query(Datasource).filter_by( company_id=self.company_id, name=name).first() if datasource_record.analysis is None: datasource_record.analysis = json.dumps( self.mindsdb_native.analyse_dataset( self.get_datasource_obj(name))) session.commit() analysis = json.loads(datasource_record.analysis) return analysis
def stop_deleted_streams(self): existed_streams = session.query(Stream).filter_by(company_id=self.company_id, integration=self.name) actual_streams = [x.name for x in existed_streams] for stream in self.streams.copy(): if stream not in actual_streams: # this stream is still running but it has been deleted from database. # need to stop it. self.log.error(f"INTEGRATION {self.name}: deleting {stream} stream.") self.streams[stream].set() del self.streams[stream]
def get_datasets(self, name=None, company_id=None): dataset_arr = [] if name is not None: dataset_record_arr = session.query(Dataset).filter_by( company_id=company_id, name=name) else: dataset_record_arr = session.query(Dataset).filter_by( company_id=company_id) for dataset_record in dataset_record_arr: try: if dataset_record.data is None: continue dataset = json.loads(dataset_record.data) dataset['created_at'] = dataset_record.created_at dataset['updated_at'] = dataset_record.updated_at dataset['name'] = dataset_record.name dataset['id'] = dataset_record.id dataset_arr.append(dataset) except Exception as e: log.error(e) return dataset_arr
def delete_datasource(self, name, company_id=None): datasource_record = Datasource.query.filter_by(company_id=company_id, name=name).first() if not Config()["force_datasource_removing"]: linked_models = Predictor.query.filter_by( company_id=company_id, datasource_id=datasource_record.id).all() if linked_models: raise Exception( "Can't delete {} datasource because there are next models linked to it: {}" .format(name, [model.name for model in linked_models])) session.query(Semaphor).filter_by(company_id=company_id, entity_id=datasource_record.id, entity_type='datasource').delete() session.delete(datasource_record) session.commit() self.fs_store.delete(f'datasource_{company_id}_{datasource_record.id}') try: shutil.rmtree(os.path.join(self.dir, f'{company_id}@@@@@{name}')) except Exception: pass
def _save(self): self._db_config = _null_to_empty(self._db_config) config_record = session.query(Configuration).filter_by( company_id=self.company_id).first() if config_record is not None: config_record.data = json.dumps(self._db_config) else: config_record = Configuration(company_id=self.company_id, data=json.dumps(self._db_config)) session.add(config_record) session.commit()